From 304776da284c5210402432e6ba31311810b74035 Mon Sep 17 00:00:00 2001 From: Steven Levithan Date: Sat, 11 Jan 2025 15:34:13 +0100 Subject: [PATCH] Support assertions/directives preceding \G subclass strategies --- spec/match-search-start.spec.js | 37 ++++++++++++++++++++++----------- src/subclass.js | 34 ++++++++++++++---------------- 2 files changed, 41 insertions(+), 30 deletions(-) diff --git a/spec/match-search-start.spec.js b/spec/match-search-start.spec.js index 6130064..b114b11 100644 --- a/spec/match-search-start.spec.js +++ b/spec/match-search-start.spec.js @@ -158,39 +158,52 @@ describe('Assertion: search_start', () => { describe('subclass strategies', () => { // Leading `(^|\G)` and similar it('should apply line_or_search_start', () => { - // Matches with `^` since not global + // ## Leading + // Match uses the `^` since not global expect(toRegExp(r`(^|\G)a`).exec('b\na')?.index).toBe(2); // Matched `a`s are the first three and last one expect('aaabaaacaa\na'.match(toRegExp(r`(^|\G)a`, {global: true}))).toEqual(['a', 'a', 'a', 'a']); expect(toRegExp(r`(?:^|\G)a`).exec('b\na')?.index).toBe(2); expect(toRegExp(r`(\G|^)a`).exec('b\na')?.index).toBe(2); + expect(toRegExp(r`(?\G|^)a`).exec('b\na')?.index).toBe(2); expect(toRegExp(r`(?:(\G|^)a)`).exec('b\na')?.index).toBe(2); expect(toRegExp(r`((\G|^)a)`).exec('b\na')?.index).toBe(2); - - // Updates match indices accurately + // ## With preceding directive/s + expect(toRegExp(r`(?i)(^|\G)a`).exec('b\na')?.index).toBe(2); + expect(toRegExp(r`(?i)(?x)(^|\G)a`).exec('b\na')?.index).toBe(2); + // ## With preceding assertion/s + expect(toRegExp(r`(?=a)(^|\G)a`).exec('b\na')?.index).toBe(2); + expect(toRegExp(r`(?=a)(?!b)\b(^|\G)a`).exec('b\na')?.index).toBe(2); + // ## Match indices on results are accurate const re = toRegExp(r`(?^|\G)a`, {global: true, hasIndices: true}); re.lastIndex = 2; - expect(re.exec('12a').indices[0][0]).toBe(2); - re.lastIndex = 2; - expect(re.exec('12a').indices.groups.n[0]).toBe(2); + const match = re.exec('12a'); + expect(match.indices[0][0]).toBe(2); + expect(match.indices.groups.n[0]).toBe(2); }); // Leading `(?!\G)` and similar it('should apply not_search_start', () => { - // Leading + // ## Leading expect(toRegExp(r`(?!\G)a`).exec('aba')?.index).toBe(2); expect(toRegExp(r`(? toDetails(r`\G(?!\G)`)).toThrow(); + expect(() => toDetails(r`(?=\G)(?!\G)`)).toThrow(); + // ## With preceding non-zero-length node expect(() => toDetails(r`a(?!\G)a`)).toThrow(); expect(() => toDetails(r`a+(?!\G)a`)).toThrow(); - // Has preceding min-zero-length quantified node + // ## With preceding min-zero-length quantified node expect(() => toDetails(r`a*(?!\G)a`)).toThrow(); // expect(toRegExp(r`a*(?!\G)a`).exec('abcaaa')?.[0]).toBe('aaa'); // expect('abcaaa'.match(toRegExp(r`a*(?!\G)`, {global}))).toEqual(['a', '', 'aaa']); diff --git a/src/subclass.js b/src/subclass.js index d6f4e68..c6ef19d 100644 --- a/src/subclass.js +++ b/src/subclass.js @@ -145,12 +145,11 @@ function adjustMatchDetails(str, re, match, offset) { } // Special case AST transformation handling that requires coupling with a `RegExp` subclass (see -// `EmulatedRegExp`). These changes add emulation support for some common patterns that are -// otherwise unsupportable. Only one subclass strategy is supported per pattern +// `EmulatedRegExp`). These changes add emulation support for some patterns that are otherwise +// unsupportable. Only one subclass strategy is supported per pattern function applySubclassStrategies(ast) { const alts = ast.pattern.alternatives; const firstEl = alts[0].elements[0]; - if (alts.length > 1 || !firstEl) { // These strategies only work if there's no top-level alternation return null; @@ -163,8 +162,14 @@ function applySubclassStrategies(ast) { firstEl.alternatives.length === 1; const singleAltIn = hasWrapperGroup ? firstEl.alternatives[0] : alts[0]; // First el within first group if the group doesn't contain top-level alternation, else just the - // first el of the pattern; ex: a flag group might enclose the full pattern - const firstElIn = hasWrapperGroup ? singleAltIn.elements[0] : firstEl; + // first el of the pattern; ex: a flag group might enclose the full pattern. Skips assertions + // like `\b` and directives like `(?i)` when considering the first element + const firstElInIndex = singleAltIn.elements.findIndex(el => ( + el.kind === AstAssertionKinds.search_start || + isLoneGLookaround(el) || + !isAlwaysZeroLength(el) + )); + const firstElIn = singleAltIn.elements[firstElInIndex]; if (!firstElIn) { return null; } @@ -195,28 +200,21 @@ function applySubclassStrategies(ast) { // ## Strategy `not_search_start`: Support leading `(?!\G)` and similar if (isLoneGLookaround(firstElIn, {negate: true})) { // Remove the `\G` and its containing negative lookaround - firstElIn.parent.elements.shift(); + firstElIn.parent.elements.splice(firstElInIndex, 1); return 'not_search_start'; } - for (let i = 0; i < singleAltIn.elements.length; i++) { - const el = singleAltIn.elements[i]; - if (!isAlwaysZeroLength(el)) { - break; - } - if (isLoneGLookaround(el, {negate: true})) { - // Remove the `\G` and its containing negative lookaround - singleAltIn.elements.splice(i, 1); - return 'not_search_start'; - } - } return null; } function isLoneGLookaround(node, options) { + const opts = { + negate: null, + ...options, + }; return ( isLookaround(node) && - node.negate === options.negate && + (opts.negate === null || node.negate === opts.negate) && hasOnlyChild(node, kid => kid.kind === AstAssertionKinds.search_start) ); }