Skip to content

Commit

Permalink
Fix invert astral capture regression (#89)
Browse files Browse the repository at this point in the history
  • Loading branch information
JLHwung authored Sep 13, 2024
1 parent 2ae1f91 commit 2a9179f
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 15 deletions.
3 changes: 1 addition & 2 deletions rewrite-pattern.js
Original file line number Diff line number Diff line change
Expand Up @@ -526,8 +526,7 @@ const processCharacterClass = (
} else {
// Generate negative set directly when case folding is not involved.
const negativeSet = UNICODE_SET.clone().remove(singleChars);
const bmpOnly = regenerateContainsAstral(negativeSet);
update(characterClassItem, negativeSet.toString({ bmpOnly: bmpOnly }));
update(characterClassItem, negativeSet.toString(regenerateOptions));
}
} else {
update(characterClassItem, `(?!${setStr})[^]`);
Expand Down
36 changes: 32 additions & 4 deletions tests/fixtures/character-class.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,31 +46,31 @@ const characterClassFixtures = [
flags: 'u',
matches: ["k", "\u212a", "\u{12345}", "\uDAAA", "\uDDDD"],
nonMatches: ["K"],
expected: '(?:[\\0-JL-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF])',
expected: '(?:[\\0-JL-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])',
options: { unicodeFlag: 'transform' }
},
{
pattern: '[^k]', // LATIN SMALL LETTER K
flags: 'u',
matches: ["K", "\u212a", "\u{12345}", "\uDAAA", "\uDDDD"],
nonMatches: ["k"],
expected: '(?:[\\0-jl-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF])',
expected: '(?:[\\0-jl-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])',
options: { unicodeFlag: 'transform' }
},
{
pattern: '[^\u212a]', // KELVIN SIGN
flags: 'u',
matches: ["K", "k", "\u{12345}", "\uDAAA", "\uDDDD"],
nonMatches: ["\u212a"],
expected: '(?:[\\0-\\u2129\\u212B-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF])',
expected: '(?:[\\0-\\u2129\\u212B-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])',
options: { unicodeFlag: 'transform' }
},
{
pattern: '[^\u{1D50E}]', // MATHEMATICAL FRAKTUR CAPITAL K
flags: 'u',
matches: ["K", "k", "\u{12345}", "\u{1D50F}", "\uDAAA", "\uDDDD"],
nonMatches: ["\u{1D50E}"],
expected: '(?:[\\0-\\uFFFF]|[\\uD800-\\uD834\\uD836-\\uDBFF][\\uDC00-\\uDFFF]|\\uD835[\\uDC00-\\uDD0D\\uDD0F-\\uDFFF])',
expected: '(?:[\\0-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uD834\\uD836-\\uDBFF][\\uDC00-\\uDFFF]|\\uD835[\\uDC00-\\uDD0D\\uDD0F-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])',
options: { unicodeFlag: 'transform' }
},
{
Expand All @@ -96,6 +96,34 @@ const characterClassFixtures = [
flags: 'u',
expected: '[^\u{1D50E}]',
options: {}
},
{
pattern: '^[^❤️]',
flags: 'u',
options: { unicodeFlag: 'transform' },
expected: '^(?:[\\0-\\u2763\\u2765-\\uD7FF\\uE000-\\uFE0E\\uFE10-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])',
nonMatches: ['❤️']
},
{
pattern: '^[^🧡]',
flags: 'u',
options: { unicodeFlag: 'transform' },
expected: '^(?:[\\0-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uD83D\\uD83F-\\uDBFF][\\uDC00-\\uDFFF]|\\uD83E[\\uDC00-\\uDDE0\\uDDE2-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])',
nonMatches: ['🧡']
},
{
pattern: '[^💛]',
flags: 'u',
options: { unicodeFlag: 'transform' },
expected: '(?:[\\0-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uD83C\\uD83E-\\uDBFF][\\uDC00-\\uDFFF]|\\uD83D[\\uDC00-\\uDC9A\\uDC9C-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])',
nonMatches: ['💛']
},
{
pattern: '[^💚]',
flags: 'u',
options: { unicodeFlag: 'transform' },
expected: '(?:[\\0-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uD83C\\uD83E-\\uDBFF][\\uDC00-\\uDFFF]|\\uD83D[\\uDC00-\\uDC99\\uDC9B-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])',
nonMatches: ['💚']
}
];

Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/unicode-set.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ const unicodeSetFixtures = [
pattern: '[^[a-z][f-h]]',
matches: ["A", "\u{12345}", "\uDAAA", "\uDDDD"],
nonMatches: ["a", "z"],
expected: '(?:[\\0-`\\{-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF])',
expected: '(?:[\\0-`\\{-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])',
options: TRANSFORM_U
},
{
Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/unicode.js
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ const unicodeFixtures = [
'matches': ['b', 'A', '\u{1D49C}', '\uDAAA', '\uDDDD'],
'nonMatches': ['a'],
'flags': FLAGS_WITH_UNICODE_WITHOUT_I,
'transpiled': '(?:[\\0-`b-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF])'
'transpiled': '(?:[\\0-`b-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])'
},
{
'pattern': '[^a]',
Expand Down
14 changes: 7 additions & 7 deletions tests/tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ describe('unicodePropertyEscapes', () => {
);
assert.equal(
rewritePattern('[^\\p{ASCII_Hex_Digit}_]', 'u', features),
'(?:[\\0-\\/:-@G-\\^`g-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF])'
'(?:[\\0-\\/:-@G-\\^`g-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])'
);
assert.equal(
rewritePattern('[\\P{Script_Extensions=Anatolian_Hieroglyphs}]', 'u', features),
Expand Down Expand Up @@ -422,11 +422,11 @@ describe('character classes', () => {
assert.strictEqual(transpiled, expected);
}
for (const match of fixture.matches || []) {
const transpiledRegex = new RegExp(`^${transpiled}$`, getOutputFlags(flags, options));
const transpiledRegex = new RegExp(transpiled, getOutputFlags(flags, options));
assert.match(match, transpiledRegex);
}
for (const nonMatch of fixture.nonMatches || []) {
const transpiledRegex = new RegExp(`^${transpiled}$`, getOutputFlags(flags, options));
const transpiledRegex = new RegExp(transpiled, getOutputFlags(flags, options));
assert.doesNotMatch(nonMatch, transpiledRegex);
}
});
Expand All @@ -451,11 +451,11 @@ describe('unicodeSets (v) flag', () => {
assert.strictEqual(transpiled, expected);
}
for (const match of fixture.matches || []) {
const transpiledRegex = new RegExp(`^${transpiled}$`, getOutputFlags(flag, options));
const transpiledRegex = new RegExp(transpiled, getOutputFlags(flag, options));
assert.match(match, transpiledRegex);
}
for (const nonMatch of fixture.nonMatches || []) {
const transpiledRegex = new RegExp(`^${transpiled}$`, getOutputFlags(flag, options));
const transpiledRegex = new RegExp(transpiled, getOutputFlags(flag, options));
assert.doesNotMatch(nonMatch, transpiledRegex);
}
});
Expand Down Expand Up @@ -493,11 +493,11 @@ describe('unicodeSets (v) flag', () => {
}
});
for (const match of fixture.matches || []) {
const transpiledRegex = new RegExp(`^${transpiled}$`, getOutputFlags(flags, options));
const transpiledRegex = new RegExp(transpiled, getOutputFlags(flags, options));
assert.match(match, transpiledRegex);
}
for (const nonMatch of fixture.nonMatches || []) {
const transpiledRegex = new RegExp(`^${transpiled}$`, getOutputFlags(flags, options));
const transpiledRegex = new RegExp(transpiled, getOutputFlags(flags, options));
assert.doesNotMatch(nonMatch, transpiledRegex);
}
}
Expand Down

0 comments on commit 2a9179f

Please sign in to comment.