Skip to content

Commit

Permalink
Merge pull request #644 from jvalue/perf-text-file
Browse files Browse the repository at this point in the history
[PERF] Don't split and join lines when using `TextFile`
  • Loading branch information
TungstnBallon authored Jan 29, 2025
2 parents fc14afc + 3e1898b commit 1f4dc40
Show file tree
Hide file tree
Showing 7 changed files with 342 additions and 59 deletions.
35 changes: 25 additions & 10 deletions libs/execution/src/lib/debugging/debug-log-visitor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import {
type WrapperFactoryProvider,
internalValueToString,
} from '@jvalue/jayvee-language-server';
import { either } from 'fp-ts';

import { type Logger } from '../logging/logger';
import { type Workbook } from '../types';
Expand All @@ -15,6 +16,7 @@ import { type TextFile } from '../types/io-types/filesystem-node-file-text';
import { type IoTypeVisitor } from '../types/io-types/io-type-implementation';
import { type Sheet } from '../types/io-types/sheet';
import { type Table } from '../types/io-types/table';
import { findLineBounds } from '../util/string-util';

import { type DebugGranularity } from './debug-configuration';

Expand Down Expand Up @@ -115,18 +117,31 @@ export class DebugLogVisitor implements IoTypeVisitor<void> {
this.logPeekComment();
}

visitTextFile(binaryFile: TextFile): void {
if (this.debugGranularity === 'minimal') {
return;
}

for (let i = 0; i < binaryFile.content.length; ++i) {
if (i > this.PEEK_NUMBER_OF_LINES) {
break;
visitTextFile(textFile: TextFile): void {
switch (this.debugGranularity) {
case 'minimal':
return;

case 'exhaustive':
this.log(textFile.content);
return;

case 'peek': {
// BUG: /\r?\n/ might not be the correct line break
const result = findLineBounds(
[this.PEEK_NUMBER_OF_LINES - 1],
/\r?\n/,
textFile.content,
);
const { start: firsNonIncludedLineStart } = (either.isRight(result)
? result.right[0]
: undefined) ?? {
start: textFile.content.length,
};
this.log(textFile.content.slice(0, firsNonIncludedLineStart));
this.logPeekComment();
}
this.log(`[Line ${i}] ${binaryFile.content[i] ?? '<undefined>'}`);
}
this.logPeekComment();
}

private logPeekComment(): void {
Expand Down
1 change: 1 addition & 0 deletions libs/execution/src/lib/util/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@

export * from './implements-static-decorator';
export * from './file-util';
export * from './string-util';
90 changes: 90 additions & 0 deletions libs/execution/src/lib/util/string-util.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// SPDX-FileCopyrightText: 2025 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

// eslint-disable-next-line unicorn/prefer-node-protocol
import { AssertionError } from 'assert';

import { either } from 'fp-ts';

import { ensureGlobal, findLineBounds } from './string-util';

describe('Validation of string-util', () => {
describe('Function ensureGlobal', () => {
it('should make a non global RegExp global', () => {
const result = ensureGlobal(/someregex/);

expect(result.global).toBe(true);
expect(result.source).toBe('someregex');
});
it('should keep a global RegExp global', () => {
const result = ensureGlobal(/someregex/g);

expect(result.global).toBe(true);
expect(result.source).toBe('someregex');
});
});
describe('Function findLineBounds', () => {
it('should return empty array for empty array', () => {
const result = findLineBounds([], /\r?\n/, 'some text');

expect(either.isRight(result)).toBe(true);
assert(either.isRight(result));
expect(result.right).toStrictEqual([]);
});
it('should return first non existent lineIdx', () => {
const result = findLineBounds(
[0, 30, 300],
/\r?\n/,
`some text
`,
);

expect(either.isLeft(result)).toBe(true);
assert(either.isLeft(result));
expect(result.left.firstNonExistentLineIdx).toBe(30);
expect(result.left.existingBounds).toStrictEqual([
{ start: 0, length: 10 },
]);
});
it('should return the entire string if there is no newline', () => {
const result = findLineBounds(
[0, 1],
/\r?\n/,
'some text without a newline',
);

expect(either.isLeft(result)).toBe(true);
assert(either.isLeft(result));
expect(result.left.firstNonExistentLineIdx).toBe(1);
expect(result.left.existingBounds).toStrictEqual([
{ start: 0, length: 27 },
]);
});
it('should correctly map multiple indices', () => {
const result = findLineBounds(
[0, 1, 2, 3],
/\r?\n/,
`some
text with
newlines
`,
);

expect(either.isLeft(result)).toBe(true);
assert(either.isLeft(result));
expect(result.left.firstNonExistentLineIdx).toBe(3);
expect(result.left.existingBounds).toStrictEqual([
{ start: 0, length: 5 },
{ start: 5, length: 11 },
{ start: 16, length: 9 },
]);
});
it('should throw an error on out of order indices', () => {
expect(() => findLineBounds([1, 0], /\r?\n/, '')).toThrowError(
AssertionError,
);
});
});
});
125 changes: 125 additions & 0 deletions libs/execution/src/lib/util/string-util.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// SPDX-FileCopyrightText: 2025 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

// eslint-disable-next-line unicorn/prefer-node-protocol
import assert from 'assert';

import { either } from 'fp-ts';

export function ensureGlobal(regex: RegExp): RegExp {
if (regex.global) {
return regex;
}

return RegExp(regex.source, regex.flags + 'g');
}

function isSortedAscending(numbers: number[]): boolean {
return numbers.every((lineIdx, i, arr) => {
if (i === 0) {
return true;
}
const prev = arr[i - 1];
assert(prev !== undefined);
return prev <= lineIdx;
});
}

function findSingleLineBounds(
searchIdx: number,
lineBreakPattern: RegExp,
text: string,
): { start: number; length: number } | undefined {
let currentLineIdx = 0;
let currentLineStart = 0;

for (const lineBreak of text.matchAll(ensureGlobal(lineBreakPattern))) {
assert(currentLineIdx <= searchIdx);
if (currentLineIdx < searchIdx) {
currentLineIdx += 1;
currentLineStart += lineBreak.index + 1;
continue;
}

const lineLengthWithoutNewline = lineBreak.index - currentLineStart;
return {
start: currentLineStart,
length: lineLengthWithoutNewline + 1,
};
}

// HINT: Line with idx `lineIdx` not found.
if (currentLineIdx !== searchIdx) {
return undefined;
}
return {
start: currentLineStart,
length: text.length - currentLineStart,
};
}

type Bounds = { start: number; length: number }[];

/**
* Map line idxs to line bounds.
*
* @param lineIdxs the indices of the lines to find bounds for. MUST be sorted in ASCENDING order.
* @param lineBreakPattern the pattern that marks a new line.
* @param text the text containing newlines.
* @returns a new array which contains either the bounds for the requested line or undefined
*
* @example
* let [{start, length}, outOfBounds ] = findLineBounds("some\ntext\n", /\r?\n/, [0, 300]);
* assert(inclusiveStart === 0);
* assert(length === 5);
* assert(outOfBounds === undefined);
*/
export function findLineBounds(
lineIdxs: number[],
lineBreakPattern: RegExp,
text: string,
): either.Either<
{ existingBounds: Bounds; firstNonExistentLineIdx: number },
Bounds
> {
assert(isSortedAscending(lineIdxs));
let lineIdxOffset = 0;
let charIdxOffset = 0;

const bounds: { start: number; length: number }[] = [];

for (const searchIdx of lineIdxs) {
if (searchIdx > 0 && text.length === 0) {
return either.left({
existingBounds: bounds,
firstNonExistentLineIdx: searchIdx,
});
}
assert(searchIdx >= lineIdxOffset);
const tmp = findSingleLineBounds(
searchIdx - lineIdxOffset,
lineBreakPattern,
text,
);
if (tmp === undefined) {
return either.left({
existingBounds: bounds,
firstNonExistentLineIdx: searchIdx,
});
}

const { start, length } = tmp;

bounds.push({
start: charIdxOffset + start,
length,
});

charIdxOffset += start + length;
lineIdxOffset = searchIdx + 1;
text = text.slice(length);
}

return either.right(bounds);
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ Test File
expect(R.isOk(result)).toEqual(false);
if (R.isErr(result)) {
expect(result.left.message).toEqual(
'Line 1 does not exist in the text file, only 0 line(s) are present',
'Line 1 does not exist in the text file.',
);
}
});
Expand Down
Loading

0 comments on commit 1f4dc40

Please sign in to comment.