Skip to content

Commit

Permalink
Support assertions/directives preceding \G subclass strategies
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Jan 11, 2025
1 parent 730d334 commit 304776d
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 30 deletions.
37 changes: 25 additions & 12 deletions spec/match-search-start.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -158,39 +158,52 @@ describe('Assertion: search_start', () => {
describe('subclass strategies', () => {
// Leading `(^|\G)` and similar
it('should apply line_or_search_start', () => {
// Matches with `^` since not global
// ## Leading
// Match uses the `^` since not global
expect(toRegExp(r`(^|\G)a`).exec('b\na')?.index).toBe(2);
// Matched `a`s are the first three and last one
expect('aaabaaacaa\na'.match(toRegExp(r`(^|\G)a`, {global: true}))).toEqual(['a', 'a', 'a', 'a']);
expect(toRegExp(r`(?:^|\G)a`).exec('b\na')?.index).toBe(2);
expect(toRegExp(r`(\G|^)a`).exec('b\na')?.index).toBe(2);
expect(toRegExp(r`(?<n>\G|^)a`).exec('b\na')?.index).toBe(2);
expect(toRegExp(r`(?:(\G|^)a)`).exec('b\na')?.index).toBe(2);
expect(toRegExp(r`((\G|^)a)`).exec('b\na')?.index).toBe(2);

// Updates match indices accurately
// ## With preceding directive/s
expect(toRegExp(r`(?i)(^|\G)a`).exec('b\na')?.index).toBe(2);
expect(toRegExp(r`(?i)(?x)(^|\G)a`).exec('b\na')?.index).toBe(2);
// ## With preceding assertion/s
expect(toRegExp(r`(?=a)(^|\G)a`).exec('b\na')?.index).toBe(2);
expect(toRegExp(r`(?=a)(?!b)\b(^|\G)a`).exec('b\na')?.index).toBe(2);
// ## Match indices on results are accurate
const re = toRegExp(r`(?<n>^|\G)a`, {global: true, hasIndices: true});
re.lastIndex = 2;
expect(re.exec('12a').indices[0][0]).toBe(2);
re.lastIndex = 2;
expect(re.exec('12a').indices.groups.n[0]).toBe(2);
const match = re.exec('12a');
expect(match.indices[0][0]).toBe(2);
expect(match.indices.groups.n[0]).toBe(2);
});

// Leading `(?!\G)` and similar
it('should apply not_search_start', () => {
// Leading
// ## Leading
expect(toRegExp(r`(?!\G)a`).exec('aba')?.index).toBe(2);
expect(toRegExp(r`(?<!\G)a`).exec('aba')?.index).toBe(2);
expect(toRegExp(r`(?:(?!\G)a)`).exec('aba')?.index).toBe(2);
expect(toRegExp(r`((?!\G)a)`).exec('aba')?.index).toBe(2);
// Has preceding zero-length node/s
// ## With preceding directive/s
expect(toRegExp(r`(?i)(?!\G)`).exec(';;')?.index).toBe(1);
expect(toRegExp(r`(?i)(?x)(?!\G)`).exec(';;')?.index).toBe(1);
// ## With preceding assertion/s
expect(toRegExp(r`(?<=;)(?!\G)`).exec(';;')?.index).toBe(1);
expect(toRegExp(r`(?!\G)(?=;)^`).exec(';;\n;')?.index).toBe(3);
expect(toRegExp(r`(?=;)(?!\G)^`).exec(';;\n;')?.index).toBe(3);
expect(toRegExp(r`(?=;)^(?!\G)`).exec(';;\n;')?.index).toBe(3);
// Has preceding non-zero-length node
expect(toRegExp(r`(?=;)(?!\G)^`).exec(';;\n;')?.index).toBe(3);
expect(toRegExp(r`(?!\G)(?=;)^`).exec(';;\n;')?.index).toBe(3);
// ## With preceding `\G`
expect(() => toDetails(r`\G(?!\G)`)).toThrow();
expect(() => toDetails(r`(?=\G)(?!\G)`)).toThrow();
// ## With preceding non-zero-length node
expect(() => toDetails(r`a(?!\G)a`)).toThrow();
expect(() => toDetails(r`a+(?!\G)a`)).toThrow();
// Has preceding min-zero-length quantified node
// ## With preceding min-zero-length quantified node
expect(() => toDetails(r`a*(?!\G)a`)).toThrow();
// expect(toRegExp(r`a*(?!\G)a`).exec('abcaaa')?.[0]).toBe('aaa');
// expect('abcaaa'.match(toRegExp(r`a*(?!\G)`, {global}))).toEqual(['a', '', 'aaa']);
Expand Down
34 changes: 16 additions & 18 deletions src/subclass.js
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,11 @@ function adjustMatchDetails(str, re, match, offset) {
}

// Special case AST transformation handling that requires coupling with a `RegExp` subclass (see
// `EmulatedRegExp`). These changes add emulation support for some common patterns that are
// otherwise unsupportable. Only one subclass strategy is supported per pattern
// `EmulatedRegExp`). These changes add emulation support for some patterns that are otherwise
// unsupportable. Only one subclass strategy is supported per pattern
function applySubclassStrategies(ast) {
const alts = ast.pattern.alternatives;
const firstEl = alts[0].elements[0];

if (alts.length > 1 || !firstEl) {
// These strategies only work if there's no top-level alternation
return null;
Expand All @@ -163,8 +162,14 @@ function applySubclassStrategies(ast) {
firstEl.alternatives.length === 1;
const singleAltIn = hasWrapperGroup ? firstEl.alternatives[0] : alts[0];
// First el within first group if the group doesn't contain top-level alternation, else just the
// first el of the pattern; ex: a flag group might enclose the full pattern
const firstElIn = hasWrapperGroup ? singleAltIn.elements[0] : firstEl;
// first el of the pattern; ex: a flag group might enclose the full pattern. Skips assertions
// like `\b` and directives like `(?i)` when considering the first element
const firstElInIndex = singleAltIn.elements.findIndex(el => (
el.kind === AstAssertionKinds.search_start ||
isLoneGLookaround(el) ||
!isAlwaysZeroLength(el)
));
const firstElIn = singleAltIn.elements[firstElInIndex];
if (!firstElIn) {
return null;
}
Expand Down Expand Up @@ -195,28 +200,21 @@ function applySubclassStrategies(ast) {
// ## Strategy `not_search_start`: Support leading `(?!\G)` and similar
if (isLoneGLookaround(firstElIn, {negate: true})) {
// Remove the `\G` and its containing negative lookaround
firstElIn.parent.elements.shift();
firstElIn.parent.elements.splice(firstElInIndex, 1);
return 'not_search_start';
}
for (let i = 0; i < singleAltIn.elements.length; i++) {
const el = singleAltIn.elements[i];
if (!isAlwaysZeroLength(el)) {
break;
}
if (isLoneGLookaround(el, {negate: true})) {
// Remove the `\G` and its containing negative lookaround
singleAltIn.elements.splice(i, 1);
return 'not_search_start';
}
}

return null;
}

function isLoneGLookaround(node, options) {
const opts = {
negate: null,
...options,
};
return (
isLookaround(node) &&
node.negate === options.negate &&
(opts.negate === null || node.negate === opts.negate) &&
hasOnlyChild(node, kid => kid.kind === AstAssertionKinds.search_start)
);
}
Expand Down

0 comments on commit 304776d

Please sign in to comment.