diff --git a/.gitignore b/.gitignore index bdae837bd2..50ac91409b 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ # Generated by gradle task "generateGrammarSource" src/main/java/com/maddyhome/idea/vim/vimscript/parser/generated +vim-engine/src/main/java/com/maddyhome/idea/vim/regexp/parser/generated # Generated JSONs for lazy classloading /vim-engine/src/main/resources/ksp-generated /src/main/resources/ksp-generated diff --git a/src/main/java/com/maddyhome/idea/vim/extension/multiplecursors/VimMultipleCursorsExtension.kt b/src/main/java/com/maddyhome/idea/vim/extension/multiplecursors/VimMultipleCursorsExtension.kt index fd99ca26fd..8f031439e4 100644 --- a/src/main/java/com/maddyhome/idea/vim/extension/multiplecursors/VimMultipleCursorsExtension.kt +++ b/src/main/java/com/maddyhome/idea/vim/extension/multiplecursors/VimMultipleCursorsExtension.kt @@ -246,7 +246,7 @@ internal class VimMultipleCursorsExtension : VimExtension { // Note that ignoreCase is not overridden by the `\C` in the pattern val pattern = makePattern(text, whole) - val matches = SearchHelper.findAll(editor, pattern, 0, -1, false) + val matches = injector.searchHelper.findAll(IjVimEditor(editor), pattern, 0, -1, false) for (match in matches) { if (match.contains(primaryCaret.offset)) { primaryCaret.vim.moveToOffset(match.startOffset) @@ -322,7 +322,7 @@ internal class VimMultipleCursorsExtension : VimExtension { searchOptions.add(SearchOptions.WRAP) } - return SearchHelper.findPattern(editor, makePattern(text, whole), startOffset, 1, searchOptions)?.startOffset ?: -1 + return injector.searchHelper.findPattern(IjVimEditor(editor), makePattern(text, whole), startOffset, 1, searchOptions)?.startOffset ?: -1 } private fun makePattern(text: String, whole: Boolean): String { diff --git a/src/main/java/com/maddyhome/idea/vim/group/IjOptionProperties.kt b/src/main/java/com/maddyhome/idea/vim/group/IjOptionProperties.kt index 3bc13d9e05..dce78db4d3 100644 --- a/src/main/java/com/maddyhome/idea/vim/group/IjOptionProperties.kt +++ b/src/main/java/com/maddyhome/idea/vim/group/IjOptionProperties.kt @@ -35,6 +35,9 @@ public open class GlobalIjOptions(scope: OptionAccessScope) : OptionsPropertiesB public var oldundo: Boolean by optionProperty(IjOptions.oldundo) public var unifyjumps: Boolean by optionProperty(IjOptions.unifyjumps) public var exCommandAnnotation: Boolean by optionProperty(IjOptions.exCommandAnnotation) + public var vimscriptFunctionAnnotation: Boolean by optionProperty(IjOptions.vimscriptFunctionAnnotation) + public var commandOrMotionAnnotation: Boolean by optionProperty(IjOptions.commandOrMotionAnnotation) + public var useNewRegex: Boolean by optionProperty(IjOptions.useNewRegex) } /** diff --git a/src/main/java/com/maddyhome/idea/vim/group/IjOptions.kt b/src/main/java/com/maddyhome/idea/vim/group/IjOptions.kt index 3514c9e5e2..167f87a30c 100644 --- a/src/main/java/com/maddyhome/idea/vim/group/IjOptions.kt +++ b/src/main/java/com/maddyhome/idea/vim/group/IjOptions.kt @@ -84,7 +84,10 @@ public object IjOptions { public val unifyjumps: ToggleOption = addOption(ToggleOption("unifyjumps", GLOBAL, "unifyjumps", true)) public val visualdelay: UnsignedNumberOption = addOption(UnsignedNumberOption("visualdelay", GLOBAL, "visualdelay", 100)) public val oldundo: ToggleOption = addOption(ToggleOption("oldundo", GLOBAL, "oldundo", false, isTemporary = true)) + public val vimscriptFunctionAnnotation: ToggleOption = addOption(ToggleOption("vimscriptfunctionannotation", GLOBAL, "vimscriptfunctionannotation", true, isTemporary = true)) + public val commandOrMotionAnnotation: ToggleOption = addOption(ToggleOption("commandormotionannotation", GLOBAL, "commandormotionannotation", true, isTemporary = true)) public val showmodewidget: ToggleOption = addOption(ToggleOption("showmodewidget", GLOBAL, "showmodewidget", false, isTemporary = true)) + public val useNewRegex: ToggleOption = addOption(ToggleOption("usenewregex", GLOBAL, "usenewregex", true, isTemporary = true)) // This needs to be Option so that it can work with derived option types, such as NumberOption, which // derives from Option diff --git a/src/main/java/com/maddyhome/idea/vim/group/ProcessGroup.kt b/src/main/java/com/maddyhome/idea/vim/group/ProcessGroup.kt index 05658411b3..3c7e211dff 100644 --- a/src/main/java/com/maddyhome/idea/vim/group/ProcessGroup.kt +++ b/src/main/java/com/maddyhome/idea/vim/group/ProcessGroup.kt @@ -51,6 +51,8 @@ import javax.swing.SwingUtilities public class ProcessGroup : VimProcessGroupBase() { override var lastCommand: String? = null private set + override var isCommandProcessing: Boolean = false + override var modeBeforeCommandProcessing: Mode? = null public override fun startSearchCommand(editor: VimEditor, context: ExecutionContext, count: Int, leader: Char) { // Don't allow searching in one line editors @@ -79,6 +81,8 @@ public class ProcessGroup : VimProcessGroupBase() { "Cannot enable cmd mode from current mode $currentMode" } + isCommandProcessing = true + modeBeforeCommandProcessing = currentMode val initText = getRange(editor, cmd) injector.markService.setVisualSelectionMarks(editor) editor.vimStateMachine.mode = Mode.CMD_LINE(currentMode) @@ -134,6 +138,9 @@ public class ProcessGroup : VimProcessGroupBase() { logger.error(bad) VimPlugin.indicateError() res = false + } finally { + isCommandProcessing = false + modeBeforeCommandProcessing = null } return res diff --git a/src/main/java/com/maddyhome/idea/vim/group/SearchGroup.java b/src/main/java/com/maddyhome/idea/vim/group/SearchGroup.java index 773dd38445..c9779faa6b 100644 --- a/src/main/java/com/maddyhome/idea/vim/group/SearchGroup.java +++ b/src/main/java/com/maddyhome/idea/vim/group/SearchGroup.java @@ -36,10 +36,9 @@ import com.maddyhome.idea.vim.newapi.IjEditorExecutionContext; import com.maddyhome.idea.vim.newapi.IjVimCaret; import com.maddyhome.idea.vim.newapi.IjVimEditor; +import com.maddyhome.idea.vim.newapi.IjVimSearchGroup; import com.maddyhome.idea.vim.options.GlobalOptionChangeListener; -import com.maddyhome.idea.vim.regexp.CharPointer; -import com.maddyhome.idea.vim.regexp.CharacterClasses; -import com.maddyhome.idea.vim.regexp.RegExp; +import com.maddyhome.idea.vim.regexp.*; import com.maddyhome.idea.vim.ui.ModalEntry; import com.maddyhome.idea.vim.ui.ex.ExEntryPanel; import com.maddyhome.idea.vim.vimscript.model.VimLContext; @@ -62,34 +61,46 @@ import static com.maddyhome.idea.vim.api.VimInjectorKt.*; import static com.maddyhome.idea.vim.helper.HelperKt.localEditors; import static com.maddyhome.idea.vim.helper.SearchHelperKtKt.shouldIgnoreCase; +import static com.maddyhome.idea.vim.newapi.IjVimInjectorKt.globalIjOptions; import static com.maddyhome.idea.vim.register.RegisterConstants.LAST_SEARCH_REGISTER; @State(name = "VimSearchSettings", storages = { @Storage(value = "$APP_CONFIG$/vim_settings_local.xml", roamingType = RoamingType.DISABLED) }) -public class SearchGroup extends VimSearchGroupBase implements PersistentStateComponent { +@Deprecated +/** + * @deprecated Replace with IjVimSearchGroup + */ +public class SearchGroup extends IjVimSearchGroup implements PersistentStateComponent { public SearchGroup() { - // TODO: Investigate migrating these listeners to use the effective value change listener - // This would allow us to update the editor we're told to update, rather than looping over all projects and updating - // the highlights in that project's current document's open editors (see VIM-2779). - // However, we probably only want to update the editors associated with the current document, so maybe the whole - // code needs to be reworked. We're currently using the same update code for changes in the search term as well as - // changes in the search options. - VimPlugin.getOptionGroup().addGlobalOptionChangeListener(Options.hlsearch, () -> { - resetShowSearchHighlight(); - forceUpdateSearchHighlights(); - }); - - final GlobalOptionChangeListener updateHighlightsIfVisible = () -> { - if (showSearchHighlight) { + super(); + if (!globalIjOptions(injector).getUseNewRegex()) { + // TODO: Investigate migrating these listeners to use the effective value change listener + // This would allow us to update the editor we're told to update, rather than looping over all projects and updating + // the highlights in that project's current document's open editors (see VIM-2779). + // However, we probably only want to update the editors associated with the current document, so maybe the whole + // code needs to be reworked. We're currently using the same update code for changes in the search term as well as + // changes in the search options. + VimPlugin.getOptionGroup().addGlobalOptionChangeListener(Options.hlsearch, () -> { + resetShowSearchHighlight(); forceUpdateSearchHighlights(); - } - }; - VimPlugin.getOptionGroup().addGlobalOptionChangeListener(Options.ignorecase, updateHighlightsIfVisible); - VimPlugin.getOptionGroup().addGlobalOptionChangeListener(Options.smartcase, updateHighlightsIfVisible); + }); + + final GlobalOptionChangeListener updateHighlightsIfVisible = () -> { + if (showSearchHighlight) { + forceUpdateSearchHighlights(); + } + }; + VimPlugin.getOptionGroup().addGlobalOptionChangeListener(Options.ignorecase, updateHighlightsIfVisible); + VimPlugin.getOptionGroup().addGlobalOptionChangeListener(Options.smartcase, updateHighlightsIfVisible); + } } public void turnOn() { + if (globalIjOptions(injector).getUseNewRegex()) { + super.updateSearchHighlights(false); + return; + } updateSearchHighlights(); } @@ -100,7 +111,12 @@ public void turnOff() { } @TestOnly + @Override public void resetState() { + if (globalIjOptions(injector).getUseNewRegex()) { + super.resetState(); + return; + } lastPatternIdx = RE_SEARCH; lastSearch = lastSubstitute = lastReplace = null; lastPatternOffset = ""; @@ -114,7 +130,9 @@ public void resetState() { * * @return The pattern used for last search. Can be null */ + @Override public @Nullable String getLastSearchPattern() { + if (globalIjOptions(injector).getUseNewRegex()) return super.getLastSearchPattern(); return lastSearch; } @@ -122,7 +140,9 @@ public void resetState() { * Get the last pattern used in substitution. * @return The pattern used for the last substitute command. Can be null */ + @Override public @Nullable String getLastSubstitutePattern() { + if (globalIjOptions(injector).getUseNewRegex()) return super.getLastSubstitutePattern(); return lastSubstitute; } @@ -131,7 +151,9 @@ public void resetState() { * * @return The pattern last used for either searching or substitution. Can be null */ - public @Nullable String getLastUsedPattern() { + @Override + protected @Nullable String getLastUsedPattern() { + if (globalIjOptions(injector).getUseNewRegex()) return super.getLastUsedPattern(); switch (lastPatternIdx) { case RE_SEARCH: return lastSearch; case RE_SUBST: return lastSubstitute; @@ -195,6 +217,10 @@ private void setLastUsedPattern(@NotNull String pattern, int which_pat, boolean @TestOnly public void setLastSearchState(@SuppressWarnings("unused") @NotNull Editor editor, @NotNull String pattern, @NotNull String patternOffset, Direction direction) { + if (globalIjOptions(injector).getUseNewRegex()) { + super.setLastSearchState(pattern, patternOffset, direction); + return; + } setLastUsedPattern(pattern, RE_SEARCH, true); lastIgnoreSmartCase = false; lastPatternOffset = patternOffset; @@ -226,7 +252,7 @@ public void setLastSearchState(@SuppressWarnings("unused") @NotNull Editor edito int startLine, int endLine, boolean ignoreCase) { - return SearchHelper.findAll(editor, pattern, startLine, endLine, ignoreCase); + return injector.getSearchHelper().findAll(new IjVimEditor(editor), pattern, startLine, endLine, ignoreCase); } /** @@ -254,6 +280,8 @@ public void setLastSearchState(@SuppressWarnings("unused") @NotNull Editor edito */ @Override public int processSearchCommand(@NotNull VimEditor editor, @NotNull String command, int startOffset, @NotNull Direction dir) { + if (globalIjOptions(injector).getUseNewRegex()) return super.processSearchCommand(editor, command, startOffset, dir); + boolean isNewPattern = false; String pattern = null; String patternOffset = null; @@ -414,6 +442,7 @@ public int processSearchRange(@NotNull Editor editor, @NotNull String pattern, i */ @Override public int searchWord(@NotNull VimEditor editor, @NotNull ImmutableVimCaret caret, int count, boolean whole, @NotNull Direction dir) { + if (globalIjOptions(injector).getUseNewRegex()) return super.searchWord(editor, caret, count, whole, dir); TextRange range = SearchHelper.findWordUnderCursor(((IjVimEditor)editor).getEditor(), ((IjVimCaret)caret).getCaret()); if (range == null) { logger.warn("No range was found"); @@ -455,6 +484,7 @@ public int searchWord(@NotNull VimEditor editor, @NotNull ImmutableVimCaret care */ @Override public int searchNext(@NotNull VimEditor editor, @NotNull ImmutableVimCaret caret, int count) { + if (globalIjOptions(injector).getUseNewRegex()) return super.searchNext(editor, caret, count); return searchNextWithDirection(((IjVimEditor)editor).getEditor(), ((IjVimCaret)caret).getCaret(), count, lastDir); } @@ -471,6 +501,7 @@ public int searchNext(@NotNull VimEditor editor, @NotNull ImmutableVimCaret care */ @Override public int searchPrevious(@NotNull VimEditor editor, @NotNull ImmutableVimCaret caret, int count) { + if (globalIjOptions(injector).getUseNewRegex()) return super.searchPrevious(editor, caret, count); return searchNextWithDirection(((IjVimEditor)editor).getEditor(), ((IjVimCaret)caret).getCaret(), count, lastDir.reverse()); } @@ -524,6 +555,8 @@ public boolean processSubstituteCommand(@NotNull VimEditor editor, @NotNull @NonNls String excmd, @NotNull @NonNls String exarg, @NotNull VimLContext parent) { + if (globalIjOptions(injector).getUseNewRegex()) return super.processSubstituteCommand(editor, caret, range, excmd, exarg, parent); + // Explicitly exit visual mode here, so that visual mode marks don't change when we move the cursor to a match. List exceptions = new ArrayList<>(); if (CommandStateHelper.inVisualMode(((IjVimEditor) editor).getEditor())) { @@ -687,8 +720,7 @@ else if (cmd.charAt() != 'p' && cmd.charAt() != 'l' && cmd.charAt() != '#' && cm return false; } - Pair> booleanregmmatch_tPair = search_regcomp(pat, which_pat, - RE_SUBST); + Pair> booleanregmmatch_tPair = search_regcomp(pat, which_pat, RE_SUBST); if (!booleanregmmatch_tPair.getFirst()) { if (do_error) { VimPlugin.showMessage(MessageHelper.message(Msg.e_invcmd)); @@ -696,9 +728,9 @@ else if (cmd.charAt() != 'p' && cmd.charAt() != 'l' && cmd.charAt() != '#' && cm } return false; } - RegExp.regmmatch_T regmatch = (RegExp.regmmatch_T) booleanregmmatch_tPair.getSecond().getFirst(); + RegExp.regmmatch_T regmatch = (RegExp.regmmatch_T)booleanregmmatch_tPair.getSecond().getFirst(); String pattern = booleanregmmatch_tPair.getSecond().getSecond(); - RegExp sp = (RegExp) booleanregmmatch_tPair.getSecond().getThird(); + RegExp sp = (RegExp)booleanregmmatch_tPair.getSecond().getThird(); /* the 'i' or 'I' flag overrules 'ignorecase' and 'smartcase' */ if (do_ic == 'i') { @@ -731,8 +763,8 @@ else if (do_ic == 'I') { resetShowSearchHighlight(); forceUpdateSearchHighlights(); - int start = ((IjVimEditor) editor).getEditor().getDocument().getLineStartOffset(line1); - int end = ((IjVimEditor) editor).getEditor().getDocument().getLineEndOffset(line2); + int start = ((IjVimEditor)editor).getEditor().getDocument().getLineStartOffset(line1); + int end = ((IjVimEditor)editor).getEditor().getDocument().getLineEndOffset(line2); if (logger.isDebugEnabled()) { logger.debug("search range=[" + start + "," + end + "]"); @@ -755,7 +787,6 @@ else if (do_ic == 'I') { firstMatch = false; } - String match = sp.vim_regsub_multi(regmatch, lnum, sub, 1, false); if (sub.charAt(0) == '\\' && sub.charAt(1) == '=') { String exprString = sub.toString().substring(2); @@ -764,22 +795,25 @@ else if (do_ic == 'I') { exceptions.add(new ExException("E15: Invalid expression: " + exprString)); expression = new SimpleExpression(new VimString("")); } - } else if (match == null) { + } + else if (match == null) { return false; } int line = lnum + regmatch.startpos[0].lnum; CharacterPosition startpos = new CharacterPosition(lnum + regmatch.startpos[0].lnum, regmatch.startpos[0].col); CharacterPosition endpos = new CharacterPosition(lnum + regmatch.endpos[0].lnum, regmatch.endpos[0].col); - int startoff = startpos.toOffset(((IjVimEditor) editor).getEditor()); - int endoff = endpos.toOffset(((IjVimEditor) editor).getEditor()); + int startoff = startpos.toOffset(((IjVimEditor)editor).getEditor()); + int endoff = endpos.toOffset(((IjVimEditor)editor).getEditor()); if (do_all || line != lastLine) { boolean doReplace = true; if (do_ask) { - RangeHighlighter hl = SearchHighlightsHelper.addSubstitutionConfirmationHighlight(((IjVimEditor) editor).getEditor(), startoff, endoff); - final ReplaceConfirmationChoice choice = confirmChoice(((IjVimEditor) editor).getEditor(), match, ((IjVimCaret) caret).getCaret(), startoff); - ((IjVimEditor) editor).getEditor().getMarkupModel().removeHighlighter(hl); + RangeHighlighter hl = + SearchHighlightsHelper.addSubstitutionConfirmationHighlight(((IjVimEditor)editor).getEditor(), startoff, + endoff); + final ReplaceConfirmationChoice choice = confirmChoice(((IjVimEditor)editor).getEditor(), match, ((IjVimCaret)caret).getCaret(), startoff); + ((IjVimEditor)editor).getEditor().getMarkupModel().removeHighlighter(hl); switch (choice) { case SUBSTITUTE_THIS: doReplace = true; @@ -802,25 +836,26 @@ else if (do_ic == 'I') { } } if (doReplace) { - SubmatchFunctionHandler.Companion.getInstance().setLatestMatch(((IjVimEditor) editor).getEditor().getDocument().getText(new com.intellij.openapi.util.TextRange(startoff, endoff))); + SubmatchFunctionHandler.Companion.getInstance().setLatestMatch( + ((IjVimEditor)editor).getEditor().getDocument().getText(new com.intellij.openapi.util.TextRange(startoff, endoff))); caret.moveToOffset(startoff); if (expression != null) { try { - match = expression - .evaluate(editor, injector.getExecutionContextManager().onEditor(editor, null), parent) - .toInsertableString(); - } catch (Exception e) { - exceptions.add((ExException) e); + match = + expression.evaluate(editor, injector.getExecutionContextManager().onEditor(editor, null), parent).toInsertableString(); + } + catch (Exception e) { + exceptions.add((ExException)e); match = ""; } } String finalMatch = match; - ApplicationManager.getApplication().runWriteAction(() -> ((IjVimEditor) editor).getEditor().getDocument().replaceString(startoff, endoff, - finalMatch)); + ApplicationManager.getApplication().runWriteAction( + () -> ((IjVimEditor)editor).getEditor().getDocument().replaceString(startoff, endoff, finalMatch)); lastMatch = startoff; int newend = startoff + match.length(); - newpos = CharacterPosition.Companion.fromOffset(((IjVimEditor) editor).getEditor(), newend); + newpos = CharacterPosition.Companion.fromOffset(((IjVimEditor)editor).getEditor(), newend); lnum += newpos.line - endpos.line; line2 += newpos.line - endpos.line; @@ -875,6 +910,10 @@ else if (do_ic == 'I') { @Override public void setLastSearchPattern(@Nullable String lastSearchPattern) { + if (globalIjOptions(injector).getUseNewRegex()) { + super.setLastSearchPattern(lastSearchPattern); + return; + } this.lastSearch = lastSearchPattern; if (showSearchHighlight) { resetIncsearchHighlights(); @@ -884,6 +923,10 @@ public void setLastSearchPattern(@Nullable String lastSearchPattern) { @Override public void setLastSubstitutePattern(@Nullable String lastSubstitutePattern) { + if (globalIjOptions(injector).getUseNewRegex()) { + super.setLastSubstitutePattern(lastSubstitutePattern); + return; + } this.lastSubstitute = lastSubstitutePattern; } @@ -893,6 +936,7 @@ public int processSearchRange(@NotNull VimEditor editor, int patternOffset, int startOffset, @NotNull Direction direction) { + if (globalIjOptions(injector).getUseNewRegex()) return super.processSearchRange(editor, pattern, patternOffset, startOffset, direction); return processSearchRange(((IjVimEditor) editor).getEditor(), pattern, patternOffset, startOffset, direction); } @@ -1015,6 +1059,7 @@ public Pair> search_regcomp(CharPointer */ @Override public @Nullable TextRange getNextSearchRange(@NotNull VimEditor editor, int count, boolean forwards) { + if (globalIjOptions(injector).getUseNewRegex()) return super.getNextSearchRange(editor, count, forwards); editor.removeSecondaryCarets(); TextRange current = findUnderCaret(editor); @@ -1046,17 +1091,10 @@ private boolean atEdgeOfGnRange(@NotNull TextRange nextRange, @NotNull Editor ed } } - @Override - @Nullable - public TextRange findUnderCaret(@NotNull VimEditor editor) { - final TextRange backSearch = searchBackward(editor, editor.primaryCaret().getOffset().getPoint() + 1, 1); - if (backSearch == null) return null; - return backSearch.contains(editor.primaryCaret().getOffset().getPoint()) ? backSearch : null; - } - @Override @Nullable public TextRange searchBackward(@NotNull VimEditor editor, int offset, int count) { + if (globalIjOptions(injector).getUseNewRegex()) return super.searchBackward(editor, offset, count); // Backward search returns wrongs end offset for some cases. That's why we should perform additional forward search final EnumSet searchOptions = EnumSet.of(SearchOptions.WRAP, SearchOptions.WHOLE_FILE, SearchOptions.BACKWARDS); final TextRange foundBackward = VimInjectorKt.getInjector().getSearchHelper().findPattern(editor, getLastUsedPattern(), offset, count, searchOptions); @@ -1074,7 +1112,12 @@ public TextRange searchBackward(@NotNull VimEditor editor, int offset, int count // // ******************************************************************************************************************* //region Search highlights + @Override public void clearSearchHighlight() { + if (globalIjOptions(injector).getUseNewRegex()) { + super.clearSearchHighlight(); + return; + } showSearchHighlight = false; updateSearchHighlights(); } @@ -1094,7 +1137,12 @@ private void updateSearchHighlights() { /** * Reset the search highlights to the last used pattern after highlighting incsearch results. */ + @Override public void resetIncsearchHighlights() { + if (globalIjOptions(injector).getUseNewRegex()) { + super.resetIncsearchHighlights(); + return; + } SearchHighlightsHelper.updateSearchHighlights(getLastUsedPattern(), lastIgnoreSmartCase, showSearchHighlight, true); } @@ -1103,9 +1151,13 @@ private void resetShowSearchHighlight() { } private void highlightSearchLines(@NotNull Editor editor, int startLine, int endLine) { + if (globalIjOptions(injector).getUseNewRegex()) { + super.highlightSearchLines(new IjVimEditor(editor), startLine, endLine); + return; + } final String pattern = getLastUsedPattern(); if (pattern != null) { - final List results = SearchHelper.findAll(editor, pattern, startLine, endLine, + final List results = injector.getSearchHelper().findAll(new IjVimEditor(editor), pattern, startLine, endLine, shouldIgnoreCase(pattern, lastIgnoreSmartCase)); SearchHighlightsHelper.highlightSearchResults(editor, pattern, results, -1); } @@ -1114,12 +1166,17 @@ private void highlightSearchLines(@NotNull Editor editor, int startLine, int end /** * Updates search highlights when the selected editor changes */ - public static void fileEditorManagerSelectionChangedCallback(@SuppressWarnings("unused") @NotNull FileEditorManagerEvent event) { + public void fileEditorManagerSelectionChangedCallback(@SuppressWarnings("unused") @NotNull FileEditorManagerEvent event) { + if (globalIjOptions(injector).getUseNewRegex()) { + super.updateSearchHighlights(false); + return; + } VimPlugin.getSearch().updateSearchHighlights(); } @Override public Integer findDecimalNumber(@NotNull String line) { + if (globalIjOptions(injector).getUseNewRegex()) return super.findDecimalNumber(line); Pair searchResult = SearchHelper.findNumberInText(line, 0, false, false, false); if (searchResult != null) { TextRange range = searchResult.component1(); @@ -1131,6 +1188,7 @@ public Integer findDecimalNumber(@NotNull String line) { @NotNull @Override public Direction getLastSearchDirection() { + if (globalIjOptions(injector).getUseNewRegex()) return super.getLastSearchDirection(); return lastDir; } @@ -1274,7 +1332,7 @@ private int findItOffset(@NotNull Editor editor, int startOffset, int count, Dir if (hasEndOffset) searchOptions.add(SearchOptions.WANT_ENDPOS); // Uses RE_LAST. We know this is always set before being called - TextRange range = SearchHelper.findPattern(editor, getLastUsedPattern(), startOffset, count, searchOptions); + TextRange range = injector.getSearchHelper().findPattern(new IjVimEditor(editor), getLastUsedPattern(), startOffset, count, searchOptions); if (range == null) { logger.warn("No range is found"); return -1; diff --git a/src/main/java/com/maddyhome/idea/vim/helper/SearchHelper.java b/src/main/java/com/maddyhome/idea/vim/helper/SearchHelper.java index 48d84cd348..651bb12fbb 100644 --- a/src/main/java/com/maddyhome/idea/vim/helper/SearchHelper.java +++ b/src/main/java/com/maddyhome/idea/vim/helper/SearchHelper.java @@ -23,6 +23,8 @@ import com.maddyhome.idea.vim.VimPlugin; import com.maddyhome.idea.vim.api.EngineEditorHelperKt; import com.maddyhome.idea.vim.api.VimEditor; +import com.maddyhome.idea.vim.regexp.*; +import com.maddyhome.idea.vim.regexp.match.VimMatchResult; import com.maddyhome.idea.vim.state.mode.Mode; import com.maddyhome.idea.vim.state.VimStateMachine; import com.maddyhome.idea.vim.common.CharacterPosition; @@ -30,8 +32,6 @@ import com.maddyhome.idea.vim.common.TextRange; import com.maddyhome.idea.vim.newapi.IjVimCaret; import com.maddyhome.idea.vim.newapi.IjVimEditor; -import com.maddyhome.idea.vim.regexp.CharPointer; -import com.maddyhome.idea.vim.regexp.RegExp; import kotlin.Pair; import org.jetbrains.annotations.Contract; import org.jetbrains.annotations.NotNull; @@ -42,10 +42,10 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import static com.maddyhome.idea.vim.api.VimInjectorKt.injector; -import static com.maddyhome.idea.vim.api.VimInjectorKt.options; +import static com.maddyhome.idea.vim.api.VimInjectorKt.*; import static com.maddyhome.idea.vim.helper.SearchHelperKtKt.checkInString; import static com.maddyhome.idea.vim.helper.SearchHelperKtKt.shouldIgnoreCase; +import static com.maddyhome.idea.vim.newapi.IjVimInjectorKt.globalIjOptions; /** * Helper methods for searching text @@ -59,6 +59,8 @@ public static String makeSearchPattern(String pattern, Boolean whole) { /** * Find text matching the given pattern. * + * @deprecated Use IjVimSearchHelper.findPattern instead + * *

See search.c:searchit

* * @param editor The editor to search in @@ -69,12 +71,13 @@ public static String makeSearchPattern(String pattern, Boolean whole) { * @return A TextRange representing the result, or null */ @Nullable + @Deprecated public static TextRange findPattern(@NotNull Editor editor, @Nullable String pattern, int startOffset, int count, EnumSet searchOptions) { - if (pattern == null || pattern.length() == 0) { + if (pattern == null || pattern.isEmpty()) { logger.warn("Pattern is null or empty. Cannot perform search"); return null; } @@ -347,6 +350,8 @@ else if (lnum <= 0) { /** * Find all occurrences of the pattern. * + * @deprecated Use IjVimSearchHelper.findall instead + * * @param editor The editor to search in * @param pattern The pattern to search for * @param startLine The start line of the range to search for @@ -354,12 +359,33 @@ else if (lnum <= 0) { * @param ignoreCase Case sensitive or insensitive searching * @return A list of TextRange objects representing the results */ + @Deprecated public static @NotNull List findAll(@NotNull Editor editor, @NotNull String pattern, int startLine, int endLine, boolean ignoreCase) { final List results = Lists.newArrayList(); + + if (globalIjOptions(injector).getUseNewRegex()) { + final List options = new ArrayList<>(); + if (globalOptions(injector).getSmartcase()) options.add(VimRegexOptions.SMART_CASE); + if (globalOptions(injector).getIgnorecase()) options.add(VimRegexOptions.IGNORE_CASE); + VimEditor vimEditor = new IjVimEditor(editor); + try { + // TODO: we shouldn't care about the ignoreCase argument, and instead just look into the editor options. + // It would require a refactor, so for now prepend \c or \C to "force" ignoreCase + String newPattern = (ignoreCase ? "\\c" : "\\C") + pattern; + VimRegex regex = new VimRegex(newPattern); + List foundMatches = regex.findAll(vimEditor, vimEditor.getLineStartOffset(startLine), vimEditor.getLineEndOffset(endLine == -1 ? vimEditor.lineCount() - 1 : endLine) + 1, options); + for (VimMatchResult.Success match : foundMatches) results.add(match.getRange()); + return results; + } catch (VimRegexException e) { + injector.getMessages().showStatusBarMessage(vimEditor, e.getMessage()); + return results; + } + } + final int lineCount = new IjVimEditor(editor).lineCount(); final int actualEndLine = endLine == -1 ? lineCount - 1 : endLine; @@ -402,6 +428,10 @@ else if (lnum <= 0) { return results; } + /** + * @deprecated Use IjVimSearchHelper.findSection instead + */ + @Deprecated public static int findSection(@NotNull Editor editor, @NotNull Caret caret, char type, int dir, int count) { CharSequence chars = editor.getDocument().getCharsSequence(); int line = caret.getLogicalPosition().line + dir; @@ -428,6 +458,10 @@ public static int findSection(@NotNull Editor editor, @NotNull Caret caret, char return res; } + /** + * @deprecated Use IjVimSearchHelper.findUnmatchedBlock instead + */ + @Deprecated public static int findUnmatchedBlock(@NotNull Editor editor, @NotNull Caret caret, char type, int count) { CharSequence chars = editor.getDocument().getCharsSequence(); int pos = caret.getOffset(); @@ -447,6 +481,8 @@ public static int findUnmatchedBlock(@NotNull Editor editor, @NotNull Caret care /** * Find block enclosing the caret * + * @deprecated Use IjVimSearchHelper.findBlockRange instead + * * @param editor The editor to search in * @param caret The caret currently at * @param type The type of block, e.g. (, [, {, < @@ -798,6 +834,10 @@ private static int ignoreWhitespaceAtLineStart(CharSequence seq, int lineStart, } + /** + * @deprecated Use IjVimSearchHelper.findBlockTagRange instead + */ + @Deprecated public static @Nullable TextRange findBlockTagRange(@NotNull Editor editor, @NotNull Caret caret, int count, @@ -1330,6 +1370,10 @@ else if (hex && ((ch >= '0' && ch <= '9') || "abcdefABCDEF".indexOf(ch) >= 0)) { return new TextRange(start, end); } + /** + * @deprecated Use IjVimSearchHelper.findWordUnderCursor instead + */ + @Deprecated @Contract("_, _, _, _, _, _, _ -> new") public static @NotNull TextRange findWordUnderCursor(@NotNull Editor editor, @NotNull Caret caret, @@ -1515,10 +1559,16 @@ public static int findNextCharacterOnLine(@NotNull Editor editor, @NotNull Caret } } + /** + * @deprecated Use IjVimSearchHelper.findMethodStart instead + */ public static int findMethodStart(@NotNull Editor editor, @NotNull Caret caret, int count) { return PsiHelper.findMethodStart(editor, caret.getOffset(), count); } + /** + * @deprecated Use IjVimSearchHelper.findMethodEnd instead + */ public static int findMethodEnd(@NotNull Editor editor, @NotNull Caret caret, int count) { return PsiHelper.findMethodEnd(editor, caret.getOffset(), count); } diff --git a/src/main/java/com/maddyhome/idea/vim/helper/SearchHighlightsHelper.kt b/src/main/java/com/maddyhome/idea/vim/helper/SearchHighlightsHelper.kt index 52625b902a..2adbbef993 100644 --- a/src/main/java/com/maddyhome/idea/vim/helper/SearchHighlightsHelper.kt +++ b/src/main/java/com/maddyhome/idea/vim/helper/SearchHighlightsHelper.kt @@ -26,6 +26,7 @@ import com.maddyhome.idea.vim.api.injector import com.maddyhome.idea.vim.api.options import com.maddyhome.idea.vim.common.TextRange import com.maddyhome.idea.vim.ex.ranges.LineRange +import com.maddyhome.idea.vim.newapi.IjVimEditor import com.maddyhome.idea.vim.newapi.vim import org.jetbrains.annotations.Contract import java.awt.Color @@ -105,7 +106,7 @@ private fun updateSearchHighlights( val startLine = searchRange?.startLine ?: 0 val endLine = searchRange?.endLine ?: -1 val results = - SearchHelper.findAll(editor, pattern, startLine, endLine, shouldIgnoreCase(pattern, shouldIgnoreSmartCase)) + injector.searchHelper.findAll(IjVimEditor(editor), pattern, startLine, endLine, shouldIgnoreCase(pattern, shouldIgnoreSmartCase)) if (results.isNotEmpty()) { currentMatchOffset = findClosestMatch(editor, results, initialOffset, forwards) highlightSearchResults(editor, pattern, results, currentMatchOffset) @@ -119,7 +120,7 @@ private fun updateSearchHighlights( } if (shouldIgnoreSmartCase) searchOptions.add(SearchOptions.IGNORE_SMARTCASE) if (!forwards) searchOptions.add(SearchOptions.BACKWARDS) - val result = SearchHelper.findPattern(editor, pattern, initialOffset, 1, searchOptions) + val result = injector.searchHelper.findPattern(IjVimEditor(editor), pattern, initialOffset, 1, searchOptions) if (result != null) { currentMatchOffset = result.startOffset val results = listOf(result) diff --git a/src/main/java/com/maddyhome/idea/vim/listener/VimListenerManager.kt b/src/main/java/com/maddyhome/idea/vim/listener/VimListenerManager.kt index 154f36606e..560cae871d 100644 --- a/src/main/java/com/maddyhome/idea/vim/listener/VimListenerManager.kt +++ b/src/main/java/com/maddyhome/idea/vim/listener/VimListenerManager.kt @@ -307,7 +307,7 @@ internal object VimListenerManager { if (VimPlugin.isNotEnabled()) return MotionGroup.fileEditorManagerSelectionChangedCallback(event) FileGroup.fileEditorManagerSelectionChangedCallback(event) - SearchGroup.fileEditorManagerSelectionChangedCallback(event) + VimPlugin.getSearch().fileEditorManagerSelectionChangedCallback(event) OptionGroup.fileEditorManagerSelectionChangedCallback(event) } } diff --git a/src/main/java/com/maddyhome/idea/vim/newapi/IjVimSearchGroup.kt b/src/main/java/com/maddyhome/idea/vim/newapi/IjVimSearchGroup.kt new file mode 100644 index 0000000000..66be2c7ae7 --- /dev/null +++ b/src/main/java/com/maddyhome/idea/vim/newapi/IjVimSearchGroup.kt @@ -0,0 +1,181 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.newapi + +import com.intellij.openapi.application.ApplicationManager +import com.intellij.openapi.util.Ref +import com.maddyhome.idea.vim.VimPlugin +import com.maddyhome.idea.vim.api.Options +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.api.VimSearchGroupBase +import com.maddyhome.idea.vim.api.globalOptions +import com.maddyhome.idea.vim.api.injector +import com.maddyhome.idea.vim.helper.MessageHelper +import com.maddyhome.idea.vim.helper.TestInputModel.Companion.getInstance +import com.maddyhome.idea.vim.helper.addSubstitutionConfirmationHighlight +import com.maddyhome.idea.vim.helper.highlightSearchResults +import com.maddyhome.idea.vim.helper.isCloseKeyStroke +import com.maddyhome.idea.vim.helper.shouldIgnoreCase +import com.maddyhome.idea.vim.helper.updateSearchHighlights +import com.maddyhome.idea.vim.options.GlobalOptionChangeListener +import com.maddyhome.idea.vim.ui.ModalEntry +import com.maddyhome.idea.vim.vimscript.model.expressions.Expression +import com.maddyhome.idea.vim.vimscript.model.functions.handlers.SubmatchFunctionHandler +import com.maddyhome.idea.vim.vimscript.parser.VimscriptParser.parseExpression +import org.jetbrains.annotations.TestOnly +import javax.swing.KeyStroke + +public open class IjVimSearchGroup : VimSearchGroupBase() { + + init { + // TODO: Investigate migrating these listeners to use the effective value change listener + // This would allow us to update the editor we're told to update, rather than looping over all projects and updating + // the highlights in that project's current document's open editors (see VIM-2779). + // However, we probably only want to update the editors associated with the current document, so maybe the whole + // code needs to be reworked. We're currently using the same update code for changes in the search term as well as + // changes in the search options. + VimPlugin.getOptionGroup().addGlobalOptionChangeListener(Options.hlsearch) { + resetSearchHighlight() + updateSearchHighlights(true) + } + + val updateHighlightsIfVisible = GlobalOptionChangeListener { + if (showSearchHighlight) { + updateSearchHighlights(true) + } + } + VimPlugin.getOptionGroup().addGlobalOptionChangeListener(Options.ignorecase, updateHighlightsIfVisible) + VimPlugin.getOptionGroup().addGlobalOptionChangeListener(Options.smartcase, updateHighlightsIfVisible) + } + + private var showSearchHighlight: Boolean = injector.globalOptions().hlsearch + + override fun highlightSearchLines( + editor: VimEditor, + startLine: Int, + endLine: Int, + ) { + val pattern = getLastUsedPattern() + if (pattern != null) { + val results = injector.searchHelper.findAll( + editor, pattern, startLine, endLine, + shouldIgnoreCase(pattern, lastIgnoreSmartCase) + ) + highlightSearchResults(editor.ij, pattern, results, -1) + } + } + + override fun updateSearchHighlights(force: Boolean) { + updateSearchHighlights(getLastUsedPattern(), lastIgnoreSmartCase, showSearchHighlight, force) + } + + override fun resetIncsearchHighlights() { + updateSearchHighlights(getLastUsedPattern(), lastIgnoreSmartCase, showSearchHighlight, true) + } + + override fun confirmChoice( + editor: VimEditor, + match: String, + caret: VimCaret, + startOffset: Int, + ): ReplaceConfirmationChoice { + val result: Ref = Ref.create(ReplaceConfirmationChoice.QUIT) + val keyStrokeProcessor: Function1 = label@{ key: KeyStroke -> + val choice: ReplaceConfirmationChoice + val c = key.keyChar + choice = if (key.isCloseKeyStroke() || c == 'q') { + ReplaceConfirmationChoice.QUIT + } else if (c == 'y') { + ReplaceConfirmationChoice.SUBSTITUTE_THIS + } else if (c == 'l') { + ReplaceConfirmationChoice.SUBSTITUTE_LAST + } else if (c == 'n') { + ReplaceConfirmationChoice.SKIP + } else if (c == 'a') { + ReplaceConfirmationChoice.SUBSTITUTE_ALL + } else { + return@label true + } + // TODO: Handle and + result.set(choice) + false + } + if (ApplicationManager.getApplication().isUnitTestMode) { + caret.moveToOffset(startOffset) + val inputModel = getInstance(editor.ij) + var key = inputModel.nextKeyStroke() + while (key != null) { + if (!keyStrokeProcessor.invoke(key)) { + break + } + key = inputModel.nextKeyStroke() + } + } else { + // XXX: The Ex entry panel is used only for UI here, its logic might be inappropriate for this method + val exEntryPanel: com.maddyhome.idea.vim.ui.ex.ExEntryPanel = + com.maddyhome.idea.vim.ui.ex.ExEntryPanel.getInstanceWithoutShortcuts() + val context = injector.executionContextManager.onEditor(editor, null) + exEntryPanel.activate( + editor.ij, + (context as IjEditorExecutionContext).context, + MessageHelper.message("replace.with.0", match), + "", + 1 + ) + caret.moveToOffset(startOffset) + ModalEntry.activate(editor, keyStrokeProcessor) + exEntryPanel.deactivate(true, false) + } + return result.get() + } + + override fun parseVimScriptExpression(expressionString: String): Expression? { + return parseExpression(expressionString) + } + + override fun addSubstitutionConfirmationHighlight(editor: VimEditor, startOffset: Int, endOffset: Int) { + val hl = addSubstitutionConfirmationHighlight( + (editor as IjVimEditor).editor, + startOffset, + endOffset + ) + editor.editor.markupModel.removeHighlighter(hl) + } + + override fun setLatestMatch(match: String) { + SubmatchFunctionHandler.getInstance().latestMatch = match + } + + override fun replaceString( + editor: VimEditor, + startOffset: Int, + endOffset: Int, + newString: String, + ) { + ApplicationManager.getApplication().runWriteAction { + (editor as IjVimEditor).editor.document.replaceString(startOffset, endOffset, newString) + } + } + + @TestOnly + override fun resetState() { + super.resetState() + showSearchHighlight = injector.globalOptions().hlsearch + } + + override fun resetSearchHighlight() { + showSearchHighlight = injector.globalOptions().hlsearch + } + + override fun clearSearchHighlight() { + showSearchHighlight = false + updateSearchHighlights(false) + } +} \ No newline at end of file diff --git a/src/main/java/com/maddyhome/idea/vim/newapi/IjVimSearchHelper.kt b/src/main/java/com/maddyhome/idea/vim/newapi/IjVimSearchHelper.kt index 741fe8953f..3cd3a9fe13 100644 --- a/src/main/java/com/maddyhome/idea/vim/newapi/IjVimSearchHelper.kt +++ b/src/main/java/com/maddyhome/idea/vim/newapi/IjVimSearchHelper.kt @@ -9,49 +9,144 @@ package com.maddyhome.idea.vim.newapi import com.intellij.openapi.components.Service +import com.intellij.openapi.diagnostic.Logger import com.maddyhome.idea.vim.api.ImmutableVimCaret import com.maddyhome.idea.vim.api.VimEditor import com.maddyhome.idea.vim.api.VimSearchHelperBase +import com.maddyhome.idea.vim.api.anyNonWhitespace +import com.maddyhome.idea.vim.api.getLineEndOffset +import com.maddyhome.idea.vim.api.getLineStartForOffset +import com.maddyhome.idea.vim.api.injector +import com.maddyhome.idea.vim.api.normalizeOffset +import com.maddyhome.idea.vim.common.Direction import com.maddyhome.idea.vim.common.TextRange +import com.maddyhome.idea.vim.helper.CharacterHelper +import com.maddyhome.idea.vim.helper.CharacterHelper.charType +import com.maddyhome.idea.vim.helper.PsiHelper import com.maddyhome.idea.vim.helper.SearchHelper import com.maddyhome.idea.vim.helper.SearchOptions +import com.maddyhome.idea.vim.helper.checkInString +import com.maddyhome.idea.vim.helper.fileSize +import com.maddyhome.idea.vim.state.VimStateMachine.Companion.getInstance +import com.maddyhome.idea.vim.state.mode.Mode.VISUAL import java.util.* +import java.util.function.Function +import java.util.regex.Pattern +import kotlin.math.abs +import kotlin.math.max @Service internal class IjVimSearchHelper : VimSearchHelperBase() { - override fun findSection(editor: VimEditor, caret: ImmutableVimCaret, type: Char, dir: Int, count: Int): Int { - return SearchHelper.findSection( - (editor as IjVimEditor).editor, - (caret as IjVimCaret).caret, - type, - dir, - count, - ) + + companion object { + private const val BLOCK_CHARS = "{}()[]<>" + private val logger = Logger.getInstance(IjVimSearchHelper::class.java.name) + } + override fun findSection( + editor: VimEditor, + caret: ImmutableVimCaret, + type: Char, + direction: Int, + count: Int, + ) + : Int { + val documentText: CharSequence = editor.ij.document.charsSequence + var currentLine: Int = caret.ij.logicalPosition.line + direction + var resultOffset = -1 + var remainingTargets = count + + while (currentLine in 1 until editor.lineCount() && remainingTargets > 0) { + val lineStartOffset = editor.getLineStartOffset(currentLine) + if (lineStartOffset < documentText.length) { + val currentChar = documentText[lineStartOffset] + if (currentChar == type || currentChar == '\u000C') { + resultOffset = lineStartOffset + remainingTargets-- + } + } + currentLine += direction + } + + if (resultOffset == -1) { + resultOffset = if (direction < 0) 0 else documentText.length - 1 + } + + return resultOffset } override fun findMethodEnd(editor: VimEditor, caret: ImmutableVimCaret, count: Int): Int { - return SearchHelper.findMethodEnd( - (editor as IjVimEditor).editor, - (caret as IjVimCaret).caret, - count, - ) + return PsiHelper.findMethodEnd(editor.ij, caret.ij.offset, count) } override fun findMethodStart(editor: VimEditor, caret: ImmutableVimCaret, count: Int): Int { - return SearchHelper.findMethodStart( - (editor as IjVimEditor).editor, - (caret as IjVimCaret).caret, - count, - ) + return PsiHelper.findMethodStart(editor.ij, caret.ij.offset, count) } override fun findUnmatchedBlock(editor: VimEditor, caret: ImmutableVimCaret, type: Char, count: Int): Int { - return SearchHelper.findUnmatchedBlock( - (editor as IjVimEditor).editor, - (caret as IjVimCaret).caret, - type, - count, - ) + val chars: CharSequence = editor.ij.document.charsSequence + var pos: Int = caret.ij.offset + val loc = BLOCK_CHARS.indexOf(type) + // What direction should we go now (-1 is backward, 1 is forward) + val dir = if (loc % 2 == 0) Direction.BACKWARDS else Direction.FORWARDS + // Which character did we find and which should we now search for + val match = BLOCK_CHARS[loc] + val found = BLOCK_CHARS[loc - dir.toInt()] + + if (pos < chars.length && chars[pos] == type) { + pos += dir.toInt() + } + return findBlockLocation(chars, found, match, dir, pos, count) + } + + private fun findBlockLocation( + chars: CharSequence, + found: Char, + match: Char, + dir: Direction, + pos: Int, + cnt: Int, + ): Int { + var position = pos + var count = cnt + var res = -1 + val initialPos = position + val initialInString = checkInString(chars, position, true) + val inCheckPosF = + Function { x: Int -> if (dir === Direction.BACKWARDS && x > 0) x - 1 else x + 1 } + val inCheckPos = inCheckPosF.apply(position) + var inString = checkInString(chars, inCheckPos, true) + var inChar = checkInString(chars, inCheckPos, false) + var stack = 0 + // Search to start or end of file, as appropriate + val charsToSearch: Set = HashSet(listOf('\'', '"', '\n', match, found)) + while (position >= 0 && position < chars.length && count > 0) { + val (c, second) = SearchHelper.findPositionOfFirstCharacter(chars, position, charsToSearch, true, dir) ?: return -1 + position = second + // If we found a match and we're not in a string... + if (c == match && (!inString) && !inChar) { + // We found our match + if (stack == 0) { + res = position + count-- + } else { + stack-- + } + } else if (c == '\n') { + inString = false + inChar = false + } else if (position != initialPos) { + // We found another character like our original - belongs to another pair + if (!inString && !inChar && c == found) { + stack++ + } else if (!inChar) { + inString = checkInString(chars, inCheckPosF.apply(position), true) + } else if (!inString) { + inChar = checkInString(chars, inCheckPosF.apply(position), false) + } + } + position += dir.toInt() + } + return res } override fun findPattern( @@ -61,11 +156,44 @@ internal class IjVimSearchHelper : VimSearchHelperBase() { count: Int, searchOptions: EnumSet?, ): TextRange? { - return SearchHelper.findPattern(editor.ij, pattern, startOffset, count, searchOptions) + return if (injector.globalIjOptions().useNewRegex) super.findPattern(editor, pattern, startOffset, count, searchOptions) + else SearchHelper.findPattern(editor.ij, pattern, startOffset, count, searchOptions) + } + + override fun findAll( + editor: VimEditor, + pattern: String, + startLine: Int, + endLine: Int, + ignoreCase: Boolean, + ): List { + return if (injector.globalIjOptions().useNewRegex) super.findAll(editor, pattern, startLine, endLine, ignoreCase) + else SearchHelper.findAll(editor.ij, pattern, startLine, endLine, ignoreCase) } override fun findNextCharacterOnLine(editor: VimEditor, caret: ImmutableVimCaret, count: Int, ch: Char): Int { - return SearchHelper.findNextCharacterOnLine(editor.ij, caret.ij, count, ch) + val line: Int = caret.ij.logicalPosition.line + val start = editor.getLineStartOffset(line) + val end = editor.getLineEndOffset(line, true) + val chars: CharSequence = editor.ij.document.charsSequence + var found = 0 + val step = if (count >= 0) 1 else -1 + var pos: Int = caret.ij.offset + step + while (pos in start until end && pos < chars.length) { + if (chars[pos] == ch) { + found++ + if (found == abs(count)) { + break + } + } + pos += step + } + + return if (found == abs(count)) { + pos + } else { + -1 + } } override fun findWordUnderCursor( @@ -77,11 +205,347 @@ internal class IjVimSearchHelper : VimSearchHelperBase() { isBig: Boolean, hasSelection: Boolean, ): TextRange { - return SearchHelper.findWordUnderCursor(editor.ij, caret.ij, count, dir, isOuter, isBig, hasSelection) + if (logger.isDebugEnabled) { + logger.debug("count=$count") + logger.debug("dir=$dir") + logger.debug("isOuter=$isOuter") + logger.debug("isBig=$isBig") + logger.debug("hasSelection=$hasSelection") + } + + val chars: CharSequence = editor.ij.document.charsSequence + //int min = EditorHelper.getLineStartOffset(editor, EditorHelper.getCurrentLogicalLine(editor)); + //int max = EditorHelper.getLineEndOffset(editor, EditorHelper.getCurrentLogicalLine(editor), true); + val min = 0 + val max: Int = editor.ij.fileSize + if (max == 0) return TextRange(0, 0) + + if (logger.isDebugEnabled) { + logger.debug("min=$min") + logger.debug("max=$max") + } + + val pos: Int = caret.ij.offset + if (chars.length <= pos) return TextRange(chars.length - 1, chars.length - 1) + + val startSpace = charType(editor, chars[pos], isBig) === CharacterHelper.CharacterType.WHITESPACE + // Find word start + val onWordStart = pos == min || + charType(editor, chars[pos - 1], isBig) !== + charType(editor, chars[pos], isBig) + var start = pos + + if (logger.isDebugEnabled) { + logger.debug("pos=$pos") + logger.debug("onWordStart=$onWordStart") + } + + if (!onWordStart && !(startSpace && isOuter) || hasSelection || count > 1 && dir == -1) { + start = if (dir == 1) { + findNextWord(editor, pos, -1, isBig, !isOuter) + } else { + findNextWord( + editor, + pos, + -(count - if (onWordStart && !hasSelection) 1 else 0), + isBig, + !isOuter + ) + } + start = editor.normalizeOffset(start, false) + } + + if (logger.isDebugEnabled) logger.debug("start=$start") + + // Find word end + + // Find word end + val onWordEnd = pos >= max - 1 || + charType(editor, chars[pos + 1], isBig) !== + charType(editor, chars[pos], isBig) + + if (logger.isDebugEnabled) logger.debug("onWordEnd=$onWordEnd") + + var end = pos + if (!onWordEnd || hasSelection || count > 1 && dir == 1 || startSpace && isOuter) { + end = if (dir == 1) { + val c = count - if (onWordEnd && !hasSelection && (!(startSpace && isOuter) || startSpace && !isOuter)) 1 else 0 + findNextWordEnd(editor, pos, c, isBig, !isOuter) + } else { + findNextWordEnd(editor, pos, 1, isBig, !isOuter) + } + } + + if (logger.isDebugEnabled) logger.debug("end=$end") + + var goBack = startSpace && !hasSelection || !startSpace && hasSelection && !onWordStart + if (dir == 1 && isOuter) { + var firstEnd = end + if (count > 1) { + firstEnd = findNextWordEnd(editor, pos, 1, isBig, false) + } + if (firstEnd < max - 1) { + if (charType(editor, chars[firstEnd + 1], false) !== CharacterHelper.CharacterType.WHITESPACE) { + goBack = true + } + } + } + if (dir == -1 && isOuter && startSpace) { + if (pos > min) { + if (charType(editor, chars[pos - 1], false) !== CharacterHelper.CharacterType.WHITESPACE) { + goBack = true + } + } + } + + var goForward = dir == 1 && isOuter && (!startSpace && !onWordEnd || startSpace && onWordEnd && hasSelection) + if (!goForward && dir == 1 && isOuter) { + var firstEnd = end + if (count > 1) { + firstEnd = findNextWordEnd(editor, pos, 1, isBig, false) + } + if (firstEnd < max - 1) { + if (charType(editor, chars[firstEnd + 1], false) !== CharacterHelper.CharacterType.WHITESPACE) { + goForward = true + } + } + } + if (!goForward && dir == 1 && isOuter && !startSpace && !hasSelection) { + if (end < max - 1) { + if (charType(editor, chars[end + 1], !isBig) !== + charType(editor, chars[end], !isBig) + ) { + goForward = true + } + } + } + + if (logger.isDebugEnabled) { + logger.debug("goBack=$goBack") + logger.debug("goForward=$goForward") + } + + if (goForward) { + if (editor.anyNonWhitespace(end, 1)) { + while (end + 1 < max && + charType(editor, chars[end + 1], false) === CharacterHelper.CharacterType.WHITESPACE + ) { + end++ + } + } + } + if (goBack) { + if (editor.anyNonWhitespace(start, -1)) { + while (start > min && + charType(editor, chars[start - 1], false) === CharacterHelper.CharacterType.WHITESPACE + ) { + start-- + } + } + } + + if (logger.isDebugEnabled) { + logger.debug("start=$start") + logger.debug("end=$end") + } + + // End offset is exclusive + return TextRange(start, end + 1) } override fun findBlockTagRange(editor: VimEditor, caret: ImmutableVimCaret, count: Int, isOuter: Boolean): TextRange? { - return SearchHelper.findBlockTagRange(editor.ij, caret.ij, count, isOuter) + var counter = count + var isOuterVariable = isOuter + val position: Int = caret.ij.offset + val sequence: CharSequence = editor.ij.document.charsSequence + + val selectionStart: Int = caret.ij.selectionStart + val selectionEnd: Int = caret.ij.selectionEnd + + val isRangeSelection = selectionEnd - selectionStart > 1 + + var searchStartPosition: Int + searchStartPosition = if (!isRangeSelection) { + val line: Int = caret.ij.logicalPosition.line + val lineBegin: Int = editor.ij.document.getLineStartOffset(line) + ignoreWhitespaceAtLineStart(sequence, lineBegin, position) + } else { + selectionEnd + } + + if (isInHTMLTag(sequence, searchStartPosition, false)) { + // caret is inside opening tag. Move to closing '>'. + while (searchStartPosition < sequence.length && sequence[searchStartPosition] != '>') { + searchStartPosition++ + } + } else if (isInHTMLTag(sequence, searchStartPosition, true)) { + // caret is inside closing tag. Move to starting '<'. + while (searchStartPosition > 0 && sequence[searchStartPosition] != '<') { + searchStartPosition-- + } + } + + while (true) { + val (closingTagTextRange, tagName) = findUnmatchedClosingTag(sequence, searchStartPosition, counter) + ?: return null + val openingTag = findUnmatchedOpeningTag(sequence, closingTagTextRange.startOffset, tagName) + ?: return null + if (isRangeSelection && openingTag.endOffset - 1 >= selectionStart) { + // If there was already some text selected and the new selection would not extend further, we try again + searchStartPosition = closingTagTextRange.endOffset + counter = 1 + continue + } + var selectionEndWithoutNewline = selectionEnd + while (selectionEndWithoutNewline < sequence.length && sequence[selectionEndWithoutNewline] == '\n') { + selectionEndWithoutNewline++ + } + val mode = getInstance(editor).mode + if (mode is VISUAL) { + if (closingTagTextRange.startOffset == selectionEndWithoutNewline && + openingTag.endOffset == selectionStart + ) { + // Special case: if the inner tag is already selected we should like isOuter is active + // Note that we need to ignore newlines, because their selection is lost between multiple "it" invocations + isOuterVariable = true + } else if (openingTag.endOffset == closingTagTextRange.startOffset && + selectionStart == openingTag.endOffset + ) { + // Special case: for an empty tag pair (e.g. ) the whole tag is selected if the caret is in the middle. + isOuterVariable = true + } + } + return if (isOuterVariable) { + TextRange(openingTag.startOffset, closingTagTextRange.endOffset) + } else { + TextRange(openingTag.endOffset, closingTagTextRange.startOffset) + } + } + } + + /** + * returns new position which ignore whitespaces at beginning of the line + */ + private fun ignoreWhitespaceAtLineStart(seq: CharSequence, lineStart: Int, pos: Int): Int { + var position = pos + if (seq.subSequence(lineStart, position).chars().allMatch { codePoint: Int -> + Character.isWhitespace( + codePoint + ) + }) { + while (position < seq.length && seq[position] != '\n' && Character.isWhitespace(seq[position])) { + position++ + } + } + return position + } + + /** + * Returns true if there is a html at the given position. Ignores tags with a trailing slash like . + */ + private fun isInHTMLTag(sequence: CharSequence, position: Int, isEndtag: Boolean): Boolean { + var openingBracket = -1 + run { + var i = position + while (i >= 0 && i < sequence.length) { + if (sequence[i] == '<') { + openingBracket = i + break + } + if (sequence[i] == '>' && i != position) { + return false + } + i-- + } + } + if (openingBracket == -1) { + return false + } + val hasSlashAfterOpening = openingBracket + 1 < sequence.length && sequence[openingBracket + 1] == '/' + if (isEndtag && !hasSlashAfterOpening || !isEndtag && hasSlashAfterOpening) { + return false + } + var closingBracket = -1 + for (i in openingBracket until sequence.length) { + if (sequence[i] == '>') { + closingBracket = i + break + } + } + return closingBracket != -1 && sequence[closingBracket - 1] != '/' + } + + private fun findUnmatchedOpeningTag( + sequence: CharSequence, + position: Int, + tagName: String, + ): TextRange? { + val quotedTagName = Pattern.quote(tagName) + val patternString = ("()" // match closing tags + + + "|(<%s" // or opening tags starting with tagName + + + "(\\s([^>]*" // After at least one whitespace there might be additional text in the tag. E.g. + + + "[^/])?)?>)") // Slash is not allowed as last character (this would be a self closing tag). + val tagPattern = + Pattern.compile(String.format(patternString, quotedTagName, quotedTagName), Pattern.CASE_INSENSITIVE) + val matcher = tagPattern.matcher(sequence.subSequence(0, position + 1)) + val openTags: Deque = ArrayDeque() + while (matcher.find()) { + val match = TextRange(matcher.start(), matcher.end()) + if (sequence[matcher.start() + 1] == '/') { + if (!openTags.isEmpty()) { + openTags.pop() + } + } else { + openTags.push(match) + } + } + return if (openTags.isEmpty()) { + null + } else { + openTags.pop() + } + } + + private fun findUnmatchedClosingTag( + sequence: CharSequence, + position: Int, + count: Int, + ): Pair? { + // The tag name may contain any characters except slashes, whitespace and '>' + var counter = count + val tagNamePattern = "([^/\\s>]+)" + // An opening tag consists of '<' followed by a tag name, optionally some additional text after whitespace and a '>' + val openingTagPattern = String.format("<%s(?:\\s[^>]*)?>", tagNamePattern) + val closingTagPattern = String.format("", tagNamePattern) + val tagPattern = Pattern.compile(String.format("(?:%s)|(?:%s)", openingTagPattern, closingTagPattern)) + val matcher = tagPattern.matcher(sequence.subSequence(position, sequence.length)) + val openTags: Deque = ArrayDeque() + while (matcher.find()) { + val isClosingTag = matcher.group(1) == null + if (isClosingTag) { + val tagName = matcher.group(2) + // Ignore unmatched open tags. Either the file is malformed or it might be a tag like
that does not need to be closed. + while (!openTags.isEmpty() && !openTags.peek().equals(tagName, ignoreCase = true)) { + openTags.pop() + } + if (openTags.isEmpty()) { + if (counter <= 1) { + return Pair(TextRange(position + matcher.start(), position + matcher.end()), tagName) + } else { + counter-- + } + } else { + openTags.pop() + } + } else { + val tagName = matcher.group(1) + openTags.push(tagName) + } + } + return null } override fun findBlockRange( @@ -91,6 +555,138 @@ internal class IjVimSearchHelper : VimSearchHelperBase() { count: Int, isOuter: Boolean, ): TextRange? { - return SearchHelper.findBlockRange(editor.ij, caret.ij, type, count, isOuter) + val chars: CharSequence = editor.ij.document.charsSequence + var pos: Int = caret.ij.offset + var start: Int = caret.ij.selectionStart + var end: Int = caret.ij.selectionEnd + + val loc = BLOCK_CHARS.indexOf(type) + val close = BLOCK_CHARS[loc + 1] + + // extend the range for blank line after type and before close, as they are excluded when inner match + if (!isOuter) { + if (start > 1 && chars[start - 2] == type && chars[start - 1] == '\n') { + start-- + } + if (end < chars.length && chars[end] == '\n') { + var isSingleLineAllWhiteSpaceUntilClose = false + var countWhiteSpaceCharacter = 1 + while (end + countWhiteSpaceCharacter < chars.length) { + if (Character.isWhitespace(chars[end + countWhiteSpaceCharacter]) && + chars[end + countWhiteSpaceCharacter] != '\n' + ) { + countWhiteSpaceCharacter++ + continue + } + if (chars[end + countWhiteSpaceCharacter] == close) { + isSingleLineAllWhiteSpaceUntilClose = true + } + break + } + if (isSingleLineAllWhiteSpaceUntilClose) { + end += countWhiteSpaceCharacter + } + } + } + + var rangeSelection = end - start > 1 + if (rangeSelection && start == 0) // early return not only for optimization + { + return null // but also not to break the interval semantic on this edge case (see below) + } + + /* In case of successive inner selection. We want to break out of + * the block delimiter of the current inner selection. + * In other terms, for the rest of the algorithm, a previous inner selection of a block + * if equivalent to an outer one. */ + + /* In case of successive inner selection. We want to break out of + * the block delimiter of the current inner selection. + * In other terms, for the rest of the algorithm, a previous inner selection of a block + * if equivalent to an outer one. */if (!isOuter && start - 1 >= 0 && type == chars[start - 1] && end < chars.length && close == chars[end]) { + start -= 1 + pos = start + rangeSelection = true + } + + /* when one char is selected, we want to find the enclosing block of (start,end] + * although when a range of characters is selected, we want the enclosing block of [start, end] + * shifting the position allow to express which kind of interval we work on */ + + /* when one char is selected, we want to find the enclosing block of (start,end] + * although when a range of characters is selected, we want the enclosing block of [start, end] + * shifting the position allow to express which kind of interval we work on */if (rangeSelection) pos = + max(0.0, (start - 1).toDouble()).toInt() + + val initialPosIsInString = checkInString(chars, pos, true) + + var bstart = -1 + var bend = -1 + + var startPosInStringFound = false + + if (initialPosIsInString) { + val quoteRange = injector.searchHelper + .findBlockQuoteInLineRange(editor, caret, '"', false) + if (quoteRange != null) { + val startOffset = quoteRange.startOffset + val endOffset = quoteRange.endOffset + val subSequence = chars.subSequence(startOffset, endOffset) + val inQuotePos = pos - startOffset + var inQuoteStart = + findBlockLocation(subSequence, close, type, Direction.BACKWARDS, inQuotePos, count) + if (inQuoteStart == -1) { + inQuoteStart = + findBlockLocation(subSequence, close, type, Direction.FORWARDS, inQuotePos, count) + } + if (inQuoteStart != -1) { + startPosInStringFound = true + val inQuoteEnd = + findBlockLocation(subSequence, type, close, Direction.FORWARDS, inQuoteStart, 1) + if (inQuoteEnd != -1) { + bstart = inQuoteStart + startOffset + bend = inQuoteEnd + startOffset + } + } + } + } + + if (!startPosInStringFound) { + bstart = findBlockLocation(chars, close, type, Direction.BACKWARDS, pos, count) + if (bstart == -1) { + bstart = findBlockLocation(chars, close, type, Direction.FORWARDS, pos, count) + } + if (bstart != -1) { + bend = findBlockLocation(chars, type, close, Direction.FORWARDS, bstart, 1) + } + } + + if (bstart == -1 || bend == -1) { + return null + } + + if (!isOuter) { + bstart++ + // exclude first line break after start for inner match + if (chars[bstart] == '\n') { + bstart++ + } + val o = editor.getLineStartForOffset(bend) + var allWhite = true + for (i in o until bend) { + if (!Character.isWhitespace(chars[i])) { + allWhite = false + break + } + } + if (allWhite) { + bend = o - 2 + } else { + bend-- + } + } + + // End offset exclusive + return TextRange(bstart, bend + 1) } } diff --git a/src/main/java/com/maddyhome/idea/vim/regexp/RegExp.kt b/src/main/java/com/maddyhome/idea/vim/regexp/RegExp.kt index f55d3cc84b..a1a18d3953 100644 --- a/src/main/java/com/maddyhome/idea/vim/regexp/RegExp.kt +++ b/src/main/java/com/maddyhome/idea/vim/regexp/RegExp.kt @@ -8,6 +8,7 @@ import com.maddyhome.idea.vim.helper.Msg import org.jetbrains.annotations.NonNls import java.util.* +@Deprecated("Please use VimRegex class instead") internal class RegExp { /* * The first byte of the regexp internal "program" is actually this magic diff --git a/src/main/java/com/maddyhome/idea/vim/vimscript/model/commands/GlobalCommand.kt b/src/main/java/com/maddyhome/idea/vim/vimscript/model/commands/GlobalCommand.kt index 058c16e399..6d6b5ea1b3 100644 --- a/src/main/java/com/maddyhome/idea/vim/vimscript/model/commands/GlobalCommand.kt +++ b/src/main/java/com/maddyhome/idea/vim/vimscript/model/commands/GlobalCommand.kt @@ -11,6 +11,7 @@ package com.maddyhome.idea.vim.vimscript.model.commands import com.intellij.openapi.editor.RangeMarker import com.intellij.vim.annotations.ExCommand import com.maddyhome.idea.vim.VimPlugin +import com.maddyhome.idea.vim.api.BufferPosition import com.maddyhome.idea.vim.api.ExecutionContext import com.maddyhome.idea.vim.api.VimEditor import com.maddyhome.idea.vim.api.getLineStartForOffset @@ -18,15 +19,20 @@ import com.maddyhome.idea.vim.api.injector import com.maddyhome.idea.vim.command.OperatorArguments import com.maddyhome.idea.vim.ex.ranges.LineRange import com.maddyhome.idea.vim.ex.ranges.Ranges +import com.maddyhome.idea.vim.group.SearchGroup import com.maddyhome.idea.vim.group.SearchGroup.RE_BOTH import com.maddyhome.idea.vim.group.SearchGroup.RE_LAST import com.maddyhome.idea.vim.group.SearchGroup.RE_SEARCH import com.maddyhome.idea.vim.group.SearchGroup.RE_SUBST import com.maddyhome.idea.vim.helper.MessageHelper.message import com.maddyhome.idea.vim.helper.Msg +import com.maddyhome.idea.vim.newapi.globalIjOptions import com.maddyhome.idea.vim.newapi.ij import com.maddyhome.idea.vim.regexp.CharPointer import com.maddyhome.idea.vim.regexp.RegExp +import com.maddyhome.idea.vim.regexp.VimRegex +import com.maddyhome.idea.vim.regexp.VimRegexException +import com.maddyhome.idea.vim.regexp.match.VimMatchResult import com.maddyhome.idea.vim.vimscript.model.ExecutionResult /** @@ -99,63 +105,110 @@ internal data class GlobalCommand(val ranges: Ranges, val argument: String, val } } - val (first, second) = injector.searchGroup.search_regcomp(pat, whichPat, RE_BOTH) - if (!first) { - VimPlugin.showMessage(message(Msg.e_invcmd)) - VimPlugin.indicateError() - return false - } - val regmatch = second.first as RegExp.regmmatch_T - val sp = second.third as RegExp - - var match: Int - val lcount = editor.lineCount() - val searchcol = 0 - if (globalBusy) { - val offset = editor.currentCaret().offset - val lineStartOffset = editor.getLineStartForOffset(offset.point) - match = sp.vim_regexec_multi(regmatch, editor, lcount, editor.currentCaret().getLine().line, searchcol) - if ((!invert && match > 0) || (invert && match <= 0)) { - globalExecuteOne(editor, context, lineStartOffset, cmd.toString()) + if (injector.globalIjOptions().useNewRegex) { + val regex = try { + VimRegex(pat.toString()) + } catch (e: VimRegexException) { + injector.messages.showStatusBarMessage(editor, e.message) + return false + } + + if (globalBusy) { + val match = regex.findInLine(editor, editor.currentCaret().getLine().line) + if (match is VimMatchResult.Success == !invert) { + globalExecuteOne(editor, context, editor.getLineStartOffset(editor.currentCaret().getLine().line), cmd.toString()) + } + } else { + val line1 = range.startLine + val line2 = range.endLine + if (line1 < 0 || line2 < 0) { + return false + } + val matches = regex.findAll( + editor, + editor.getLineStartOffset(line1), + editor.getLineEndOffset(line2), + ) + val marks = if (!invert) matches.map { + editor.ij.document.createRangeMarker(editor.getLineStartForOffset(it.range.startOffset), editor.getLineStartForOffset(it.range.startOffset)) + // filter out lines that contain a match + } else (line1..line2).filterNot { line -> + matches.map { match -> + editor.offsetToBufferPosition(match.range.startOffset).line + }.contains(line) + }.map { editor.ij.document.createRangeMarker(editor.getLineStartOffset(it), editor.getLineStartOffset(it)) } + + if (gotInt) { + VimPlugin.showMessage(message("e_interr")) + } else if (marks.isEmpty()) { + if (invert) { + VimPlugin.showMessage(message("global.command.not.found.v", pat.toString())) + } else { + VimPlugin.showMessage(message("global.command.not.found.g", pat.toString())) + } + } else { + globalExe(editor, context, marks, cmd.toString()) + } } } else { - // pass 1: set marks for each (not) matching line - val line1 = range.startLine - val line2 = range.endLine - //region search_regcomp implementation - // We don't need to worry about lastIgnoreSmartCase, it's always false. Vim resets after checking, and it only sets - // it to true when searching for a word with `*`, `#`, `g*`, etc. - - if (line1 < 0 || line2 < 0) { + val (first, second) = (injector.searchGroup as SearchGroup).search_regcomp(pat, whichPat, RE_BOTH) + if (!first) { + VimPlugin.showMessage(message(Msg.e_invcmd)) + VimPlugin.indicateError() return false } - - var ndone = 0 - val marks = mutableListOf() - for (lnum in line1..line2) { - if (gotInt) break - - // a match on this line? - match = sp.vim_regexec_multi(regmatch, editor, lcount, lnum, searchcol) + val regmatch = second.first as RegExp.regmmatch_T + val sp = second.third as RegExp + + var match: Int + val lcount = editor.lineCount() + val searchcol = 0 + if (globalBusy) { + val offset = editor.currentCaret().offset + val lineStartOffset = editor.getLineStartForOffset(offset.point) + match = sp.vim_regexec_multi(regmatch, editor, lcount, editor.currentCaret().getLine().line, searchcol) if ((!invert && match > 0) || (invert && match <= 0)) { - val lineStartOffset = editor.getLineStartOffset(lnum) - marks += editor.ij.document.createRangeMarker(lineStartOffset, lineStartOffset) - ndone += 1 + globalExecuteOne(editor, context, lineStartOffset, cmd.toString()) + } + } else { + // pass 1: set marks for each (not) matching line + val line1 = range.startLine + val line2 = range.endLine + //region search_regcomp implementation + // We don't need to worry about lastIgnoreSmartCase, it's always false. Vim resets after checking, and it only sets + // it to true when searching for a word with `*`, `#`, `g*`, etc. + + if (line1 < 0 || line2 < 0) { + return false } - // TODO: 25.05.2021 Check break - } - // pass 2: execute the command for each line that has been marked - if (gotInt) { - VimPlugin.showMessage(message("e_interr")) - } else if (ndone == 0) { - if (invert) { - VimPlugin.showMessage(message("global.command.not.found.v", pat.toString())) + var ndone = 0 + val marks = mutableListOf() + for (lnum in line1..line2) { + if (gotInt) break + + // a match on this line? + match = sp.vim_regexec_multi(regmatch, editor, lcount, lnum, searchcol) + if ((!invert && match > 0) || (invert && match <= 0)) { + val lineStartOffset = editor.getLineStartOffset(lnum) + marks += editor.ij.document.createRangeMarker(lineStartOffset, lineStartOffset) + ndone += 1 + } + // TODO: 25.05.2021 Check break + } + + // pass 2: execute the command for each line that has been marked + if (gotInt) { + VimPlugin.showMessage(message("e_interr")) + } else if (ndone == 0) { + if (invert) { + VimPlugin.showMessage(message("global.command.not.found.v", pat.toString())) + } else { + VimPlugin.showMessage(message("global.command.not.found.g", pat.toString())) + } } else { - VimPlugin.showMessage(message("global.command.not.found.g", pat.toString())) + globalExe(editor, context, marks, cmd.toString()) } - } else { - globalExe(editor, context, marks, cmd.toString()) } } return true diff --git a/src/test/java/org/jetbrains/plugins/ideavim/regex/VimRegexEngineTest.kt b/src/test/java/org/jetbrains/plugins/ideavim/regex/VimRegexEngineTest.kt new file mode 100644 index 0000000000..f480be0995 --- /dev/null +++ b/src/test/java/org/jetbrains/plugins/ideavim/regex/VimRegexEngineTest.kt @@ -0,0 +1,157 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package org.jetbrains.plugins.ideavim.regex + +import com.intellij.openapi.editor.VisualPosition +import com.maddyhome.idea.vim.api.injector +import com.maddyhome.idea.vim.command.CommandState +import com.maddyhome.idea.vim.common.TextRange +import com.maddyhome.idea.vim.helper.mode +import com.maddyhome.idea.vim.mark.VimMark +import com.maddyhome.idea.vim.newapi.vim +import com.maddyhome.idea.vim.regexp.VimRegex +import org.jetbrains.plugins.ideavim.VimTestCase +import org.junit.jupiter.api.Test +import kotlin.test.assertEquals + +class VimRegexEngineTest : VimTestCase() { + private fun findAll(pattern: String): List { + val regex = VimRegex(pattern) + return regex.findAll(fixture.editor.vim).map { it.range } + } + + @Test + fun `test end of word at middle of text`() { + configureByText("Lorem Ipsum") + val result = findAll("Lorem\\>") + assertEquals(result, listOf(TextRange(0, 5))) + } + + @Test + fun `test end of word should fail`() { + configureByText("Lorem Ipsum") + val result = findAll("Lo\\>rem") + assertEquals(result, emptyList()) + } + + @Test + fun `test start of word at offset`() { + configureByText("Lorem Ipsum") + val result = findAll("\\") + assertEquals(result, listOf(TextRange(6, 11))) + } + + @Test + fun `test start of word at start of text`() { + configureByText("Lorem Ipsum") + val result = findAll("\\'m\\%#.") + assertEquals(result, listOf(TextRange(6, 7))) + } + + @Test + fun `test text at mark position`() { + configureByText("Lorem Ipsum") + val editor = fixture.editor.vim + val mark = VimMark.create('m', 0, 5, editor.getPath(), editor.extractProtocol())!! + injector.markService.setMark(editor.primaryCaret(), mark) + + val result = findAll("\\%'m...") + assertEquals(result, listOf(TextRange(5, 8))) + } + + @Test + fun `test text before mark position`() { + configureByText("Lorem Ipsum") + val editor = fixture.editor.vim + val mark = VimMark.create('m', 0, 5, editor.getPath(), editor.extractProtocol())!! + injector.markService.setMark(editor.primaryCaret(), mark) + + val result = findAll("\\%<'m...") + assertEquals(result, listOf(TextRange(0, 3), TextRange(3, 6))) + } + + @Test + fun `test text after mark position`() { + configureByText("Lorem Ipsum") + val editor = fixture.editor.vim + val mark = VimMark.create('m', 0, 5, editor.getPath(), editor.extractProtocol())!! + injector.markService.setMark(editor.primaryCaret(), mark) + + val result = findAll("\\%>'m...") + assertEquals(result, listOf(TextRange(6, 9))) + } + + @Test + fun `test text is inside visual area`() { + configureByText("${c}Lorem Ipsum") + typeText("v$") + val result = findAll("\\%VLorem Ipsu\\%Vm") + assertEquals(result, listOf(TextRange(0, 11))) + } + + @Test + fun `test text is not inside visual area`() { + configureByText("${c}Lorem Ipsum") + typeText("vw") + val result = findAll("\\%VLorem Ipsu\\%Vm") + assertEquals(result, emptyList()) + } + + @Test + fun `test cursor and visual belong to the same cursor`() { + configureByText("Lorem Ipsum") + + val caretModel = fixture.editor.caretModel + typeText("v") // a workaround to trigger visual mode + caretModel.addCaret(VisualPosition(0, 2)) + val caret = caretModel.getCaretAt(VisualPosition(0, 2))!! + caret.setSelection(0, 5) + caretModel.addCaret(VisualPosition(0, 0)) + caretModel.addCaret(VisualPosition(0, 1)) + caretModel.addCaret(VisualPosition(0, 3)) + + val result = findAll("\\%V.\\{-}\\%#.") + assertEquals(result, listOf(TextRange(0, 3))) + } + +} \ No newline at end of file diff --git a/vim-engine/build.gradle.kts b/vim-engine/build.gradle.kts index 17a309800a..5489db9ec4 100644 --- a/vim-engine/build.gradle.kts +++ b/vim-engine/build.gradle.kts @@ -11,7 +11,8 @@ plugins { kotlin("jvm") // id("org.jlleitschuh.gradle.ktlint") id("com.google.devtools.ksp") version "1.8.21-1.0.11" - `maven-publish` + `maven-publish` + antlr } val kotlinVersion: String by project @@ -32,7 +33,8 @@ ksp { } afterEvaluate { - tasks.named("kspTestKotlin").configure { enabled = false } + tasks.named("kspKotlin").configure { dependsOn("generateGrammarSource") } + tasks.named("kspTestKotlin").configure { enabled = false } } dependencies { @@ -45,9 +47,14 @@ dependencies { compileOnly("org.jetbrains:annotations:24.1.0") + runtimeOnly("org.antlr:antlr4-runtime:4.10.1") + antlr("org.antlr:antlr4:4.10.1") + ksp(project(":annotation-processors")) implementation(project(":annotation-processors")) compileOnly("org.jetbrains.kotlinx:kotlinx-serialization-json-jvm:$kotlinxSerializationVersion") + + testImplementation("org.mockito.kotlin:mockito-kotlin:5.0.0") } tasks { @@ -55,6 +62,19 @@ tasks { useJUnitPlatform() } + generateGrammarSource { + maxHeapSize = "128m" + arguments.addAll(listOf("-package", "com.maddyhome.idea.vim.regexp.parser.generated", "-visitor")) + outputDirectory = file("src/main/java/com/maddyhome/idea/vim/regexp/parser/generated") + } + + named("compileKotlin") { + dependsOn("generateGrammarSource") + } + named("compileTestKotlin") { + dependsOn("generateTestGrammarSource") + } + compileKotlin { kotlinOptions { apiVersion = "1.5" diff --git a/vim-engine/src/main/antlr/RegexLexer.g4 b/vim-engine/src/main/antlr/RegexLexer.g4 new file mode 100644 index 0000000000..71acdf61bf --- /dev/null +++ b/vim-engine/src/main/antlr/RegexLexer.g4 @@ -0,0 +1,664 @@ +lexer grammar RegexLexer; + +tokens { + ALTERNATION, AND, LEFT_PAREN, RIGHT_PAREN, LITERAL_CHAR, DOT, STAR, PLUS, OPTIONAL, + RANGE_START, COLLECTION_START, CLASS_IDENTIFIER, CLASS_IDENTIFIER_D, CLASS_KEYWORD, + CLASS_KEYWORD_D, CLASS_FILENAME, CLASS_FILENAME_D, CLASS_PRINTABLE, CLASS_PRINTABLE_D, + CLASS_WS, CLASS_NOT_WS, CLASS_DIGIT, CLASS_NOT_DIGIT, CLASS_HEX, CLASS_NOT_HEX, + CLASS_OCTAL, CLASS_NOT_OCTAL, CLASS_WORD, CLASS_NOT_WORD, CLASS_HEADWORD, CLASS_NOT_HEADWORD, + CLASS_ALPHA, CLASS_NOT_ALPHA, CLASS_LCASE, CLASS_NOT_LCASE, CLASS_UCASE, CLASS_NOT_UCASE, + CLASS_ESC, CLASS_TAB, CLASS_CR, CLASS_BS, CLASS_NL, COLLECTION_LITERAL_CHAR, CURSOR, + LEFT_PAREN_NOCAPTURE, START_MATCH, END_MATCH, DOTNL, RANGE_START_LAZY, BACKREFERENCE, + CLASS_IDENTIFIER_NL, CLASS_IDENTIFIER_D_NL, CLASS_KEYWORD_NL, CLASS_KEYWORD_D_NL, + CLASS_FILENAME_NL, CLASS_FILENAME_D_NL, CLASS_PRINTABLE_NL, CLASS_PRINTABLE_D_NL, + CLASS_WS_NL, CLASS_NOT_WS_NL, CLASS_DIGIT_NL, CLASS_NOT_DIGIT_NL, CLASS_HEX_NL, + CLASS_NOT_HEX_NL, CLASS_OCTAL_NL, CLASS_NOT_OCTAL_NL, CLASS_WORD_NL, CLASS_NOT_WORD_NL, + CLASS_HEADWORD_NL, CLASS_NOT_HEADWORD_NL, CLASS_ALPHA_NL, CLASS_NOT_ALPHA_NL, CLASS_LCASE_NL, + CLASS_NOT_LCASE_NL, CLASS_UCASE_NL, CLASS_NOT_UCASE_NL, START_OF_FILE, END_OF_FILE, + CARET, DOLLAR, START_OF_LINE, END_OF_LINE, ATOMIC, START_OF_WORD, END_OF_WORD, + POSITIVE_LOOKAHEAD, NEGATIVE_LOOKAHEAD, POSITIVE_LOOKBEHIND, NEGATIVE_LOOKBEHIND, + POSITIVE_LIMITED_LOOKBEHIND, NEGATIVE_LIMITED_LOOKBEHIND, LAST_SUBSTITUTE, VISUAL, + DECIMAL_CODE, OCTAL_CODE, HEXADECIMAL_CODE, UNICODE_CODE, WIDE_UNICODE_CODE, + LINE, BEFORE_LINE, AFTER_LINE, COLUMN, BEFORE_COLUMN, AFTER_COLUMN, LINE_CURSOR, + BEFORE_LINE_CURSOR, AFTER_LINE_CURSOR, COLUMN_CURSOR, BEFORE_COLUMN_CURSOR, + AFTER_COLUMN_CURSOR, OPTIONALLY_MATCHED_START, OPTIONALLY_MATCHED_END, MARK, + BEFORE_MARK, AFTER_MARK +} + +@members { + public Boolean ignoreCase = null; + + void setIgnoreCase() { ignoreCase = true; } + void setNoIgnoreCase() { if (ignoreCase == null) ignoreCase = false; } +} + +// ------------------------------------------------------------------------------------------------ // +// // +// // +// DEFAULT_MODE - This is the default lexer mode, and can be set after seeing a \m token // +// // +// // +// ------------------------------------------------------------------------------------------------ // +ALTERNATION_MAGIC: '\\|' -> type(ALTERNATION); +AND_MAGIC: '\\&' -> type(AND); +LEFT_PAREN_MAGIC: '\\(' -> type(LEFT_PAREN); +LEFT_PAREN_NOCAPTURE_MAGIC: '\\%(' -> type(LEFT_PAREN_NOCAPTURE); +RIGHT_PAREN_MAGIC: '\\)' -> type(RIGHT_PAREN); +DOT_MAGIC: '.' -> type(DOT); +DOTNL_MAGIC: '\\_.' -> type(DOTNL); +BACKREFERENCE_MAGIC: '\\' [0-9] -> type(BACKREFERENCE); +LAST_SUBSTITUTE_MAGIC: '~' -> type(LAST_SUBSTITUTE); +DECIMAL_CODE_MAGIC: '\\%d' [0-9]+ -> type(DECIMAL_CODE); +OCTAL_CODE_MAGIC: '\\%o' [0-7] [0-7]? [0-7]? -> type(OCTAL_CODE); +HEXADECIMAL_CODE_MAGIC: '\\%x' [a-fA-F0-9] [a-fA-F0-9]? -> type(HEXADECIMAL_CODE); +UNICODE_CODE_MAGIC: '\\%u' [a-fA-F0-9] [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? -> type(UNICODE_CODE); +WIDE_UNICODE_CODE_MAGIC: '\\%U' [a-fA-F0-9] [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? -> type(WIDE_UNICODE_CODE); + +// multi +STAR_MAGIC: '*' -> type(STAR); +PLUS_MAGIC: '\\+' -> type(PLUS); +OPTIONAL_MAGIC: ('\\=' | '\\?') -> type(OPTIONAL); +ATOMIC_MAGIC: '\\@>' -> type(ATOMIC); +POSITIVE_LOOKAHEAD_MAGIC: '\\@=' -> type(POSITIVE_LOOKAHEAD); +NEGATIVE_LOOKAHEAD_MAGIC: '\\@!' -> type(NEGATIVE_LOOKAHEAD); +POSITIVE_LOOKBEHIND_MAGIC: '\\@<=' -> type(POSITIVE_LOOKBEHIND); +NEGATIVE_LOOKBEHIND_MAGIC: '\\@ type(NEGATIVE_LOOKBEHIND); +POSITIVE_LIMITED_LOOKBEHIND_MAGIC: '\\@' [0-9]+ '<=' -> type(POSITIVE_LIMITED_LOOKBEHIND); +NEGATIVE_LIMITED_LOOKBEHIND_MAGIC: '\\@' [0-9]+ ' type(NEGATIVE_LIMITED_LOOKBEHIND); +RANGE_START_MAGIC: '\\{' -> pushMode(INSIDE_RANGE), type(RANGE_START); +RANGE_START_LAZY_MAGIC: '\\{-' -> pushMode(INSIDE_RANGE), type(RANGE_START_LAZY); + +COLLECTION_START_MAGIC: '[' -> pushMode(INSIDE_COLLECTION), type(COLLECTION_START); +COLLECTION_START_EOL_MAGIC: '\\_[' -> pushMode(INSIDE_COLLECTION), type(COLLECTION_START); +OPTIONALLY_MATCHED_START_MAGIC: '\\%[' -> type(OPTIONALLY_MATCHED_START); +OPTIONALLY_MATCHED_END_MAGIC: ']' -> type(OPTIONALLY_MATCHED_END); + +// zero-width tokens +CURSOR_MAGIC: '\\%#' -> type(CURSOR); +START_MATCH_MAGIC: '\\zs' -> type(START_MATCH); +END_MATCH_MAGIC: '\\ze' -> type(END_MATCH); +START_OF_FILE_MAGIC: '\\%^' -> type(START_OF_FILE); +END_OF_FILE_MAGIC: '\\%$' -> type(END_OF_FILE); +START_OF_LINE_MAGIC: '\\_^' -> type(START_OF_LINE); +END_OF_LINE_MAGIC: '\\_$' -> type(END_OF_LINE); +CARET_MAGIC: '^' -> type(CARET); +DOLLAR_MAGIC: '$' -> type(DOLLAR); +START_OF_WORD_MAGIC: '\\<' -> type(START_OF_WORD); +END_OF_WORD_MAGIC: '\\>' -> type(END_OF_WORD); +VISUAL_MAGIC: '\\%V' -> type(VISUAL); +LINE_MAGIC: '\\%' [0-9]+ 'l' -> type(LINE); +BEFORE_LINE_MAGIC: '\\%<' [0-9]+ 'l' -> type(BEFORE_LINE); +AFTER_LINE_MAGIC: '\\%>' [0-9]+ 'l' -> type(AFTER_LINE); +LINE_CURSOR_MAGIC: '\\%.l' -> type(LINE_CURSOR); +BEFORE_LINE_CURSOR_MAGIC: '\\%<.l' -> type(BEFORE_LINE_CURSOR); +AFTER_LINE_CURSOR_MAGIC: '\\%>.l' -> type(AFTER_LINE_CURSOR); +COLUMN_MAGIC: '\\%' [0-9]+ [cv] -> type(COLUMN); +BEFORE_COLUMN_MAGIC: '\\%<' [0-9]+ [cv] -> type(BEFORE_COLUMN); +AFTER_COLUMN_MAGIC: '\\%>' [0-9]+ [cv] -> type(AFTER_COLUMN); +COLUMN_CURSOR_MAGIC: '\\%.' [cv] -> type(COLUMN_CURSOR); +BEFORE_COLUMN_CURSOR_MAGIC: '\\%<.' [cv] -> type(BEFORE_COLUMN_CURSOR); +AFTER_COLUMN_CURSOR_MAGIC: '\\%>.' [cv] -> type(AFTER_COLUMN_CURSOR); +MARK_MAGIC: '\\%\'' ([a-zA-Z0-9<>'`"^.(){}] | '[' | ']') -> type(MARK); +BEFORE_MARK_MAGIC: '\\%<\'' ([a-zA-Z0-9<>'`"^.(){}] | '[' | ']') -> type(BEFORE_MARK); +AFTER_MARK_MAGIC: '\\%>\'' ([a-zA-Z0-9<>'`"^.(){}] | '[' | ']') -> type(AFTER_MARK); + +// case-related tokens +IGNORE_CASE_MAGIC: '\\c' { setIgnoreCase(); } -> skip; +NO_IGNORE_CASE_MAGIC: '\\C' { setNoIgnoreCase(); } -> skip; + +// character classes +CLASS_IDENTIFIER_MAGIC: '\\i' -> type(CLASS_IDENTIFIER); +CLASS_IDENTIFIER_D_MAGIC: '\\I' -> type(CLASS_IDENTIFIER_D); +CLASS_KEYWORD_MAGIC: '\\k' -> type(CLASS_KEYWORD); +CLASS_KEYWORD_D_MAGIC: '\\K' -> type(CLASS_KEYWORD_D); +CLASS_FILENAME_MAGIC: '\\f' -> type(CLASS_FILENAME); +CLASS_FILENAME_D_MAGIC: '\\F' -> type(CLASS_FILENAME_D); +CLASS_PRINTABLE_MAGIC: '\\p' -> type(CLASS_PRINTABLE); +CLASS_PRINTABLE_D_MAGIC: '\\P' -> type(CLASS_PRINTABLE_D); +CLASS_WS_MAGIC: '\\s' -> type(CLASS_WS); +CLASS_NOT_WS_MAGIC: '\\S' -> type(CLASS_NOT_WS); +CLASS_DIGIT_MAGIC: '\\d' -> type(CLASS_DIGIT); +CLASS_NOT_DIGIT_MAGIC: '\\D' -> type(CLASS_NOT_DIGIT); +CLASS_HEX_MAGIC: '\\x' -> type(CLASS_HEX); +CLASS_NOT_HEX_MAGIC: '\\X' -> type(CLASS_NOT_HEX); +CLASS_OCTAL_MAGIC: '\\o' -> type(CLASS_OCTAL); +CLASS_NOT_OCTAL_MAGIC: '\\O' -> type(CLASS_NOT_OCTAL); +CLASS_WORD_MAGIC: '\\w' -> type(CLASS_WORD); +CLASS_NOT_WORD_MAGIC: '\\W' -> type(CLASS_NOT_WORD); +CLASS_HEADWORD_MAGIC: '\\h' -> type(CLASS_HEADWORD); +CLASS_NOT_HEADWORD_MAGIC: '\\H' -> type(CLASS_NOT_HEADWORD); +CLASS_ALPHA_MAGIC: '\\a' -> type(CLASS_ALPHA); +CLASS_NOT_ALPHA_MAGIC: '\\A' -> type(CLASS_NOT_ALPHA); +CLASS_LCASE_MAGIC: '\\l' -> type(CLASS_LCASE); +CLASS_NOT_LCASE_MAGIC: '\\L' -> type(CLASS_NOT_LCASE); +CLASS_UCASE_MAGIC: '\\u' -> type(CLASS_UCASE); +CLASS_NOT_UCASE_MAGIC: '\\U' -> type(CLASS_NOT_UCASE); + +CLASS_IDENTIFIER_NL_MAGIC: '\\_i' -> type(CLASS_IDENTIFIER_NL); +CLASS_IDENTIFIER_D_NL_MAGIC: '\\_I' -> type(CLASS_IDENTIFIER_D_NL); +CLASS_KEYWORD_NL_MAGIC: '\\_k' -> type(CLASS_KEYWORD_NL); +CLASS_KEYWORD_D_NL_MAGIC: '\\_K' -> type(CLASS_KEYWORD_D_NL); +CLASS_FILENAME_NL_MAGIC: '\\_f' -> type(CLASS_FILENAME_NL); +CLASS_FILENAME_D_NL_MAGIC: '\\_F' -> type(CLASS_FILENAME_D_NL); +CLASS_PRINTABLE_NL_MAGIC: '\\_p' -> type(CLASS_PRINTABLE_NL); +CLASS_PRINTABLE_D_NL_MAGIC: '\\_P' -> type(CLASS_PRINTABLE_D_NL); +CLASS_WS_NL_MAGIC: '\\_s' -> type(CLASS_WS_NL); +CLASS_NOT_WS_NL_MAGIC: '\\_S' -> type(CLASS_NOT_WS_NL); +CLASS_DIGIT_NL_MAGIC: '\\_d' -> type(CLASS_DIGIT_NL); +CLASS_NOT_DIGIT_NL_MAGIC: '\\_D' -> type(CLASS_NOT_DIGIT_NL); +CLASS_HEX_NL_MAGIC: '\\_x' -> type(CLASS_HEX_NL); +CLASS_NOT_HEX_NL_MAGIC: '\\_X' -> type(CLASS_NOT_HEX_NL); +CLASS_OCTAL_NL_MAGIC: '\\_o' -> type(CLASS_OCTAL_NL); +CLASS_NOT_OCTAL_NL_MAGIC: '\\_O' -> type(CLASS_NOT_OCTAL_NL); +CLASS_WORD_NL_MAGIC: '\\_w' -> type(CLASS_WORD_NL); +CLASS_NOT_WORD_NL_MAGIC: '\\_W' -> type(CLASS_NOT_WORD_NL); +CLASS_HEADWORD_NL_MAGIC: '\\_h' -> type(CLASS_HEADWORD_NL); +CLASS_NOT_HEADWORD_NL_MAGIC: '\\_H' -> type(CLASS_NOT_HEADWORD_NL); +CLASS_ALPHA_NL_MAGIC: '\\_a' -> type(CLASS_ALPHA_NL); +CLASS_NOT_ALPHA_NL_MAGIC: '\\_A' -> type(CLASS_NOT_ALPHA_NL); +CLASS_LCASE_NL_MAGIC: '\\_l' -> type(CLASS_LCASE_NL); +CLASS_NOT_LCASE_NL_MAGIC: '\\_L' -> type(CLASS_NOT_LCASE_NL); +CLASS_UCASE_NL_MAGIC: '\\_u' -> type(CLASS_UCASE_NL); +CLASS_NOT_UCASE_NL_MAGIC: '\\_U' -> type(CLASS_NOT_UCASE_NL); + +CLASS_ESC_MAGIC: '\\e' -> type(CLASS_ESC); +CLASS_TAB_MAGIC: '\\t' -> type(CLASS_TAB); +CLASS_CR_MAGIC: '\\r' -> type(CLASS_CR); +CLASS_BS_MAGIC: '\\b' -> type(CLASS_BS); +CLASS_NL_MAGIC: '\\n' -> type(CLASS_NL); + +// tokens related to changing lexer mode. These are only used by the lexer and not sent to the parser +SETMAGIC_MAGIC: '\\m' -> skip; // already in magic mode +SETNOMAGIC_MAGIC: '\\M' -> mode(NO_MAGIC), skip; +SETVMAGIC_MAGIC: '\\v' -> mode(V_MAGIC), skip; +SETVNOMAGIC_MAGIC: '\\V' -> mode(V_NO_MAGIC), skip; + +// everything else, either escaped or not, should be taken literally +LITERAL_CHAR_MAGIC: '\\'? . -> type(LITERAL_CHAR); + +// ------------------------------------------------------------------------------------------------ // +// // +// // +// NO_MAGIC - This mode is set when the lexer comes across an \M token // +// // +// // +// ------------------------------------------------------------------------------------------------ // +mode NO_MAGIC; +ALTERNATION_NOMAGIC: '\\|' -> type(ALTERNATION); +AND_NOMAGIC: '\\&' -> type(AND); +LEFT_PAREN_NOMAGIC: '\\(' -> type(LEFT_PAREN); +LEFT_PAREN_NOCAPTURE_NOMAGIC: '\\%(' -> type(LEFT_PAREN_NOCAPTURE); +RIGHT_PAREN_NOMAGIC: '\\)' -> type(RIGHT_PAREN); +DOT_NOMAGIC: '\\.' -> type(DOT); +DOTNL_NOMAGIC: '\\_.' -> type(DOTNL); +BACKREFERENCE_NOMAGIC: '\\' [0-9] -> type(BACKREFERENCE); +LAST_SUBSTITUTE_NOMAGIC: '\\~' -> type(LAST_SUBSTITUTE); +DECIMAL_CODE_NOMAGIC: '\\%d' [0-9]+ -> type(DECIMAL_CODE); +OCTAL_CODE_NOMAGIC: '\\%o' [0-7] [0-7]? [0-7]? -> type(OCTAL_CODE); +HEXADECIMAL_CODE_NOMAGIC: '\\%x' [a-fA-F0-9] [a-fA-F0-9]? -> type(HEXADECIMAL_CODE); +UNICODE_CODE_NOMAGIC: '\\%u' [a-fA-F0-9] [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? -> type(UNICODE_CODE); +WIDE_UNICODE_CODE_NOMAGIC: '\\%U' [a-fA-F0-9] [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? -> type(WIDE_UNICODE_CODE); + +// multi +STAR_NOMAGIC: '\\*' -> type(STAR); +PLUS_NOMAGIC: '\\+' -> type(PLUS); +OPTIONAL_NOMAGIC: ('\\=' | '\\?') -> type(OPTIONAL); +ATOMIC_NOMAGIC: '\\@>' -> type(ATOMIC); +POSITIVE_LOOKAHEAD_NOMAGIC: '\\@=' -> type(POSITIVE_LOOKAHEAD); +NEGATIVE_LOOKAHEAD_NOMAGIC: '\\@!' -> type(NEGATIVE_LOOKAHEAD); +POSITIVE_LOOKBEHIND_NOMAGIC: '\\@<=' -> type(POSITIVE_LOOKBEHIND); +NEGATIVE_LOOKBEHIND_NOMAGIC: '\\@ type(NEGATIVE_LOOKBEHIND); +POSITIVE_LIMITED_LOOKBEHIND_NOMAGIC: '\\@' [0-9]+ '<=' -> type(POSITIVE_LIMITED_LOOKBEHIND); +NEGATIVE_LIMITED_LOOKBEHIND_NOMAGIC: '\\@' [0-9]+ ' type(NEGATIVE_LIMITED_LOOKBEHIND); +RANGE_START_NOMAGIC: '\\{' -> pushMode(INSIDE_RANGE), type(RANGE_START); +RANGE_START_LAZY_NOMAGIC: '\\{-' -> pushMode(INSIDE_RANGE), type(RANGE_START_LAZY); + +COLLECTION_START_NOMAGIC: '\\[' -> pushMode(INSIDE_COLLECTION), type(COLLECTION_START); +COLLECTION_START_EOL_NOMAGIC: '\\_[' -> pushMode(INSIDE_COLLECTION), type(COLLECTION_START); +OPTIONALLY_MATCHED_START_NOMAGIC: '\\%[' -> type(OPTIONALLY_MATCHED_START); +OPTIONALLY_MATCHED_END_NOMAGIC: ']' -> type(OPTIONALLY_MATCHED_END); + +// zero-width tokens +CURSOR_NOMAGIC: '\\%#' -> type(CURSOR); +START_MATCH_NOMAGIC: '\\zs' -> type(START_MATCH); +END_MATCH_NOMAGIC: '\\ze' -> type(END_MATCH); +START_OF_FILE_NOMAGIC: '\\%^' -> type(START_OF_FILE); +END_OF_FILE_NOMAGIC: '\\%$' -> type(END_OF_FILE); +START_OF_LINE_NOMAGIC: '\\_^' -> type(START_OF_LINE); +END_OF_LINE_NOMAGIC: '\\_$' -> type(END_OF_LINE); +CARET_NOMAGIC: '^' -> type(CARET); +DOLLAR_NOMAGIC: '$' -> type(DOLLAR); +START_OF_WORD_NOMAGIC: '\\<' -> type(START_OF_WORD); +END_OF_WORD_NOMAGIC: '\\>' -> type(END_OF_WORD); +VISUAL_NOMAGIC: '\\%V' -> type(VISUAL); +LINE_NOMAGIC: '\\%' [0-9]+ 'l' -> type(LINE); +BEFORE_LINE_NOMAGIC: '\\%<' [0-9]+ 'l' -> type(BEFORE_LINE); +AFTER_LINE_NOMAGIC: '\\%>' [0-9]+ 'l' -> type(AFTER_LINE); +LINE_CURSOR_NOMAGIC: '\\%.l' -> type(LINE_CURSOR); +BEFORE_LINE_CURSOR_NOMAGIC: '\\%<.l' -> type(BEFORE_LINE_CURSOR); +AFTER_LINE_CURSOR_NOMAGIC: '\\%>.l' -> type(AFTER_LINE_CURSOR); +COLUMN_NOMAGIC: '\\%' [0-9]+ [cv] -> type(COLUMN); +BEFORE_COLUMN_NOMAGIC: '\\%<' [0-9]+ [cv] -> type(BEFORE_COLUMN); +AFTER_COLUMN_NOMAGIC: '\\%>' [0-9]+ [cv] -> type(AFTER_COLUMN); +COLUMN_CURSOR_NOMAGIC: '\\%.' [cv] -> type(COLUMN_CURSOR); +BEFORE_COLUMN_CURSOR_NOMAGIC: '\\%<.' [cv] -> type(BEFORE_COLUMN_CURSOR); +AFTER_COLUMN_CURSOR_NOMAGIC: '\\%>.' [cv] -> type(AFTER_COLUMN_CURSOR); +MARK_NOMAGIC: '\\%\'' ([a-zA-Z0-9<>'`"^.(){}] | '[' | ']') -> type(MARK); +BEFORE_MARK_NOMAGIC: '\\%<\'' ([a-zA-Z0-9<>'`"^.(){}] | '[' | ']') -> type(BEFORE_MARK); +AFTER_MARK_NOMAGIC: '\\%>\'' ([a-zA-Z0-9<>'`"^.(){}] | '[' | ']') -> type(AFTER_MARK); + +// case-related tokens +IGNORE_CASE_NOMAGIC: '\\c' { setIgnoreCase(); } -> skip; +NO_IGNORE_CASE_NOMAGIC: '\\C' { setNoIgnoreCase(); } -> skip; + +// character classes +CLASS_IDENTIFIER_NOMAGIC: '\\i' -> type(CLASS_IDENTIFIER); +CLASS_IDENTIFIER_D_NOMAGIC: '\\I' -> type(CLASS_IDENTIFIER_D); +CLASS_KEYWORD_NOMAGIC: '\\k' -> type(CLASS_KEYWORD); +CLASS_KEYWORD_D_NOMAGIC: '\\K' -> type(CLASS_KEYWORD_D); +CLASS_FILENAME_NOMAGIC: '\\f' -> type(CLASS_FILENAME); +CLASS_FILENAME_D_NOMAGIC: '\\F' -> type(CLASS_FILENAME_D); +CLASS_PRINTABLE_NOMAGIC: '\\p' -> type(CLASS_PRINTABLE); +CLASS_PRINTABLE_D_NOMAGIC: '\\P' -> type(CLASS_PRINTABLE_D); +CLASS_WS_NOMAGIC: '\\s' -> type(CLASS_WS); +CLASS_NOT_WS_NOMAGIC: '\\S' -> type(CLASS_NOT_WS); +CLASS_DIGIT_NOMAGIC: '\\d' -> type(CLASS_DIGIT); +CLASS_NOT_DIGIT_NOMAGIC: '\\D' -> type(CLASS_NOT_DIGIT); +CLASS_HEX_NOMAGIC: '\\x' -> type(CLASS_HEX); +CLASS_NOT_HEX_NOMAGIC: '\\X' -> type(CLASS_NOT_HEX); +CLASS_OCTAL_NOMAGIC: '\\o' -> type(CLASS_OCTAL); +CLASS_NOT_OCTAL_NOMAGIC: '\\O' -> type(CLASS_NOT_OCTAL); +CLASS_WORD_NOMAGIC: '\\w' -> type(CLASS_WORD); +CLASS_NOT_WORD_NOMAGIC: '\\W' -> type(CLASS_NOT_WORD); +CLASS_HEADWORD_NOMAGIC: '\\h' -> type(CLASS_HEADWORD); +CLASS_NOT_HEADWORD_NOMAGIC: '\\H' -> type(CLASS_NOT_HEADWORD); +CLASS_ALPHA_NOMAGIC: '\\a' -> type(CLASS_ALPHA); +CLASS_NOT_ALPHA_NOMAGIC: '\\A' -> type(CLASS_NOT_ALPHA); +CLASS_LCASE_NOMAGIC: '\\l' -> type(CLASS_LCASE); +CLASS_NOT_LCASE_NOMAGIC: '\\L' -> type(CLASS_NOT_LCASE); +CLASS_UCASE_NOMAGIC: '\\u' -> type(CLASS_UCASE); +CLASS_NOT_UCASE_NOMAGIC: '\\U' -> type(CLASS_NOT_UCASE); + +CLASS_IDENTIFIER_NL_NOMAGIC: '\\_i' -> type(CLASS_IDENTIFIER_NL); +CLASS_IDENTIFIER_D_NL_NOMAGIC: '\\_I' -> type(CLASS_IDENTIFIER_D_NL); +CLASS_KEYWORD_NL_NOMAGIC: '\\_k' -> type(CLASS_KEYWORD_NL); +CLASS_KEYWORD_D_NL_NOMAGIC: '\\_K' -> type(CLASS_KEYWORD_D_NL); +CLASS_FILENAME_NL_NOMAGIC: '\\_f' -> type(CLASS_FILENAME_NL); +CLASS_FILENAME_D_NL_NOMAGIC: '\\_F' -> type(CLASS_FILENAME_D_NL); +CLASS_PRINTABLE_NL_NOMAGIC: '\\_p' -> type(CLASS_PRINTABLE_NL); +CLASS_PRINTABLE_D_NL_NOMAGIC: '\\_P' -> type(CLASS_PRINTABLE_D_NL); +CLASS_WS_NL_NOMAGIC: '\\_s' -> type(CLASS_WS_NL); +CLASS_NOT_WS_NL_NOMAGIC: '\\_S' -> type(CLASS_NOT_WS_NL); +CLASS_DIGIT_NL_NOMAGIC: '\\_d' -> type(CLASS_DIGIT_NL); +CLASS_NOT_DIGIT_NL_NOMAGIC: '\\_D' -> type(CLASS_NOT_DIGIT_NL); +CLASS_HEX_NL_NOMAGIC: '\\_x' -> type(CLASS_HEX_NL); +CLASS_NOT_HEX_NL_NOMAGIC: '\\_X' -> type(CLASS_NOT_HEX_NL); +CLASS_OCTAL_NL_NOMAGIC: '\\_o' -> type(CLASS_OCTAL_NL); +CLASS_NOT_OCTAL_NL_NOMAGIC: '\\_O' -> type(CLASS_NOT_OCTAL_NL); +CLASS_WORD_NL_NOMAGIC: '\\_w' -> type(CLASS_WORD_NL); +CLASS_NOT_WORD_NL_NOMAGIC: '\\_W' -> type(CLASS_NOT_WORD_NL); +CLASS_HEADWORD_NL_NOMAGIC: '\\_h' -> type(CLASS_HEADWORD_NL); +CLASS_NOT_HEADWORD_NL_NOMAGIC: '\\_H' -> type(CLASS_NOT_HEADWORD_NL); +CLASS_ALPHA_NL_NOMAGIC: '\\_a' -> type(CLASS_ALPHA_NL); +CLASS_NOT_ALPHA_NL_NOMAGIC: '\\_A' -> type(CLASS_NOT_ALPHA_NL); +CLASS_LCASE_NL_NOMAGIC: '\\_l' -> type(CLASS_LCASE_NL); +CLASS_NOT_LCASE_NL_NOMAGIC: '\\_L' -> type(CLASS_NOT_LCASE_NL); +CLASS_UCASE_NL_NOMAGIC: '\\_u' -> type(CLASS_UCASE_NL); +CLASS_NOT_UCASE_NL_NOMAGIC: '\\_U' -> type(CLASS_NOT_UCASE_NL); + +CLASS_ESC_NOMAGIC: '\\e' -> type(CLASS_ESC); +CLASS_TAB_NOMAGIC: '\\t' -> type(CLASS_TAB); +CLASS_CR_NOMAGIC: '\\r' -> type(CLASS_CR); +CLASS_BS_NOMAGIC: '\\b' -> type(CLASS_BS); +CLASS_NL_NOMAGIC: '\\n' -> type(CLASS_NL); + +// tokens related to changing lexer mode. These are only used by the lexer and not sent to the parser +SETMAGIC_NOMAGIC: '\\m' -> mode(DEFAULT_MODE), skip; +SETNOMAGIC_NOMAGIC: '\\M' -> skip; // already in nomagic mode +SETVMAGIC_NOMAGIC: '\\v' -> mode(V_MAGIC), skip; +SETVNOMAGIC_NOMAGIC: '\\V' -> mode(V_NO_MAGIC), skip; + +// everything else, either escaped or not, should be taken literally +LITERAL_CHAR_NOMAGIC: '\\'? . -> type(LITERAL_CHAR); + + +// ------------------------------------------------------------------------------------------------ // +// // +// // +// V_MAGIC - This mode is set when the lexer comes across an \v token // +// // +// // +// ------------------------------------------------------------------------------------------------ // +mode V_MAGIC; +ALTERNATION_VMAGIC: '|' -> type(ALTERNATION); +AND_VMAGIC: '&' -> type(AND); +LEFT_PAREN_VMAGIC: '(' -> type(LEFT_PAREN); +LEFT_PAREN_NOCAPTURE_VMAGIC: '%(' -> type(LEFT_PAREN_NOCAPTURE); +RIGHT_PAREN_VMAGIC: ')' -> type(RIGHT_PAREN); +DOT_VMAGIC: '.' -> type(DOT); +DOTNL_VMAGIC: '\\_.' -> type(DOTNL); +BACKREFERENCE_VMAGIC: '\\' [0-9] -> type(BACKREFERENCE); +LAST_SUBSTITUTE_VMAGIC: '~' -> type(LAST_SUBSTITUTE); +DECIMAL_CODE_VMAGIC: '%d' [0-9]+ -> type(DECIMAL_CODE); +OCTAL_CODE_VMAGIC: '%o' [0-7] [0-7]? [0-7]? -> type(OCTAL_CODE); +HEXADECIMAL_CODE_VMAGIC: '%x' [a-fA-F0-9] [a-fA-F0-9]? -> type(HEXADECIMAL_CODE); +UNICODE_CODE_VMAGIC: '%u' [a-fA-F0-9] [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? -> type(UNICODE_CODE); +WIDE_UNICODE_CODE_VMAGIC: '%U' [a-fA-F0-9] [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? -> type(WIDE_UNICODE_CODE); + +// multi +STAR_VMAGIC: '*' -> type(STAR); +PLUS_VMAGIC: '+' -> type(PLUS); +OPTIONAL_VMAGIC: ('=' | '?') -> type(OPTIONAL); +ATOMIC_VMAGIC: '@>' -> type(ATOMIC); +POSITIVE_LOOKAHEAD_VMAGIC: '@=' -> type(POSITIVE_LOOKAHEAD); +NEGATIVE_LOOKAHEAD_VMAGIC: '@!' -> type(NEGATIVE_LOOKAHEAD); +POSITIVE_LOOKBEHIND_VMAGIC: '@<=' -> type(POSITIVE_LOOKBEHIND); +NEGATIVE_LOOKBEHIND_VMAGIC: '@ type(NEGATIVE_LOOKBEHIND); +POSITIVE_LIMITED_LOOKBEHIND_VMAGIC: '@' [0-9]+ '<=' -> type(POSITIVE_LIMITED_LOOKBEHIND); +NEGATIVE_LIMITED_LOOKBEHIND_VMAGIC: '@' [0-9]+ ' type(NEGATIVE_LIMITED_LOOKBEHIND); +RANGE_START_VMAGIC: '{' -> pushMode(INSIDE_RANGE), type(RANGE_START); +RANGE_START_LAZY_VMAGIC: '{-' -> pushMode(INSIDE_RANGE), type(RANGE_START_LAZY); + +COLLECTION_START_VMAGIC: '[' -> pushMode(INSIDE_COLLECTION), type(COLLECTION_START); +COLLECTION_START_EOL_VMAGIC: '\\_[' -> pushMode(INSIDE_COLLECTION), type(COLLECTION_START); +OPTIONALLY_MATCHED_START_VMAGIC: '%[' -> type(OPTIONALLY_MATCHED_START); +OPTIONALLY_MATCHED_END_VMAGIC: ']' -> type(OPTIONALLY_MATCHED_END); + +// zero-width tokens +CURSOR_VMAGIC: '%#' -> type(CURSOR); +START_MATCH_VMAGIC: '\\zs' -> type(START_MATCH); +END_MATCH_VMAGIC: '\\ze' -> type(END_MATCH); +START_OF_FILE_VMAGIC: '%^' -> type(START_OF_FILE); +END_OF_FILE_VMAGIC: '%$' -> type(END_OF_FILE); +START_OF_LINE_VMAGIC: '\\_^' -> type(START_OF_LINE); +END_OF_LINE_VMAGIC: '\\_$' -> type(END_OF_LINE); +CARET_VMAGIC: '^' -> type(CARET); +DOLLAR_VMAGIC: '$' -> type(DOLLAR); +START_OF_WORD_VMAGIC: '<' -> type(START_OF_WORD); +END_OF_WORD_VMAGIC: '>' -> type(END_OF_WORD); +VISUAL_VMAGIC: '%V' -> type(VISUAL); +LINE_VMAGIC: '%' [0-9]+ 'l' -> type(LINE); +BEFORE_LINE_VMAGIC: '%<' [0-9]+ 'l' -> type(BEFORE_LINE); +AFTER_LINE_VMAGIC: '%>' [0-9]+ 'l' -> type(AFTER_LINE); +LINE_CURSOR_VMAGIC: '%.l' -> type(LINE_CURSOR); +BEFORE_LINE_CURSOR_VMAGIC: '%<.l' -> type(BEFORE_LINE_CURSOR); +AFTER_LINE_CURSOR_VMAGIC: '%>.l' -> type(AFTER_LINE_CURSOR); +COLUMN_VMAGIC: '%' [0-9]+ [cv] -> type(COLUMN); +BEFORE_COLUMN_VMAGIC: '%<' [0-9]+ [cv] -> type(BEFORE_COLUMN); +AFTER_COLUMN_VMAGIC: '%>' [0-9]+ [cv] -> type(AFTER_COLUMN); +COLUMN_CURSOR_VMAGIC: '%.' [cv] -> type(COLUMN_CURSOR); +BEFORE_COLUMN_CURSOR_VMAGIC: '%<.' [cv] -> type(BEFORE_COLUMN_CURSOR); +AFTER_COLUMN_CURSOR_VMAGIC: '%>.' [cv] -> type(AFTER_COLUMN_CURSOR); +MARK_VMAGIC: '%\'' ([a-zA-Z0-9<>'`"^.(){}] | '[' | ']') -> type(MARK); +BEFORE_MARK_VMAGIC: '%<\'' ([a-zA-Z0-9<>'`"^.(){}] | '[' | ']') -> type(BEFORE_MARK); +AFTER_MARK_VMAGIC: '%>\'' ([a-zA-Z0-9<>'`"^.(){}] | '[' | ']') -> type(AFTER_MARK); + +// case-related tokens +IGNORE_CASE_VMAGIC: '\\c' { setIgnoreCase(); } -> skip; +NO_IGNORE_CASE_VMAGIC: '\\C' { setNoIgnoreCase(); } -> skip; + +// character classes +CLASS_IDENTIFIER_VMAGIC: '\\i' -> type(CLASS_IDENTIFIER); +CLASS_IDENTIFIER_D_VMAGIC: '\\I' -> type(CLASS_IDENTIFIER_D); +CLASS_KEYWORD_VMAGIC: '\\k' -> type(CLASS_KEYWORD); +CLASS_KEYWORD_D_VMAGIC: '\\K' -> type(CLASS_KEYWORD_D); +CLASS_FILENAME_VMAGIC: '\\f' -> type(CLASS_FILENAME); +CLASS_FILENAME_D_VMAGIC: '\\F' -> type(CLASS_FILENAME_D); +CLASS_PRINTABLE_VMAGIC: '\\p' -> type(CLASS_PRINTABLE); +CLASS_PRINTABLE_D_VMAGIC: '\\P' -> type(CLASS_PRINTABLE_D); +CLASS_WS_VMAGIC: '\\s' -> type(CLASS_WS); +CLASS_NOT_WS_VMAGIC: '\\S' -> type(CLASS_NOT_WS); +CLASS_DIGIT_VMAGIC: '\\d' -> type(CLASS_DIGIT); +CLASS_NOT_DIGIT_VMAGIC: '\\D' -> type(CLASS_NOT_DIGIT); +CLASS_HEX_VMAGIC: '\\x' -> type(CLASS_HEX); +CLASS_NOT_HEX_VMAGIC: '\\X' -> type(CLASS_NOT_HEX); +CLASS_OCTAL_VMAGIC: '\\o' -> type(CLASS_OCTAL); +CLASS_NOT_OCTAL_VMAGIC: '\\O' -> type(CLASS_NOT_OCTAL); +CLASS_WORD_VMAGIC: '\\w' -> type(CLASS_WORD); +CLASS_NOT_WORD_VMAGIC: '\\W' -> type(CLASS_NOT_WORD); +CLASS_HEADWORD_VMAGIC: '\\h' -> type(CLASS_HEADWORD); +CLASS_NOT_HEADWORD_VMAGIC: '\\H' -> type(CLASS_NOT_HEADWORD); +CLASS_ALPHA_VMAGIC: '\\a' -> type(CLASS_ALPHA); +CLASS_NOT_ALPHA_VMAGIC: '\\A' -> type(CLASS_NOT_ALPHA); +CLASS_LCASE_VMAGIC: '\\l' -> type(CLASS_LCASE); +CLASS_NOT_LCASE_VMAGIC: '\\L' -> type(CLASS_NOT_LCASE); +CLASS_UCASE_VMAGIC: '\\u' -> type(CLASS_UCASE); +CLASS_NOT_UCASE_VMAGIC: '\\U' -> type(CLASS_NOT_UCASE); + +CLASS_IDENTIFIER_NL_VMAGIC: '\\_i' -> type(CLASS_IDENTIFIER_NL); +CLASS_IDENTIFIER_D_NL_VMAGIC: '\\_I' -> type(CLASS_IDENTIFIER_D_NL); +CLASS_KEYWORD_NL_VMAGIC: '\\_k' -> type(CLASS_KEYWORD_NL); +CLASS_KEYWORD_D_NL_VMAGIC: '\\_K' -> type(CLASS_KEYWORD_D_NL); +CLASS_FILENAME_NL_VMAGIC: '\\_f' -> type(CLASS_FILENAME_NL); +CLASS_FILENAME_D_NL_VMAGIC: '\\_F' -> type(CLASS_FILENAME_D_NL); +CLASS_PRINTABLE_NL_VMAGIC: '\\_p' -> type(CLASS_PRINTABLE_NL); +CLASS_PRINTABLE_D_NL_VMAGIC: '\\_P' -> type(CLASS_PRINTABLE_D_NL); +CLASS_WS_NL_VMAGIC: '\\_s' -> type(CLASS_WS_NL); +CLASS_NOT_WS_NL_VMAGIC: '\\_S' -> type(CLASS_NOT_WS_NL); +CLASS_DIGIT_NL_VMAGIC: '\\_d' -> type(CLASS_DIGIT_NL); +CLASS_NOT_DIGIT_NL_VMAGIC: '\\_D' -> type(CLASS_NOT_DIGIT_NL); +CLASS_HEX_NL_VMAGIC: '\\_x' -> type(CLASS_HEX_NL); +CLASS_NOT_HEX_NL_VMAGIC: '\\_X' -> type(CLASS_NOT_HEX_NL); +CLASS_OCTAL_NL_VMAGIC: '\\_o' -> type(CLASS_OCTAL_NL); +CLASS_NOT_OCTAL_NL_VMAGIC: '\\_O' -> type(CLASS_NOT_OCTAL_NL); +CLASS_WORD_NL_VMAGIC: '\\_w' -> type(CLASS_WORD_NL); +CLASS_NOT_WORD_NL_VMAGIC: '\\_W' -> type(CLASS_NOT_WORD_NL); +CLASS_HEADWORD_NL_VMAGIC: '\\_h' -> type(CLASS_HEADWORD_NL); +CLASS_NOT_HEADWORD_NL_VMAGIC: '\\_H' -> type(CLASS_NOT_HEADWORD_NL); +CLASS_ALPHA_NL_VMAGIC: '\\_a' -> type(CLASS_ALPHA_NL); +CLASS_NOT_ALPHA_NL_VMAGIC: '\\_A' -> type(CLASS_NOT_ALPHA_NL); +CLASS_LCASE_NL_VMAGIC: '\\_l' -> type(CLASS_LCASE_NL); +CLASS_NOT_LCASE_NL_VMAGIC: '\\_L' -> type(CLASS_NOT_LCASE_NL); +CLASS_UCASE_NL_VMAGIC: '\\_u' -> type(CLASS_UCASE_NL); +CLASS_NOT_UCASE_NL_VMAGIC: '\\_U' -> type(CLASS_NOT_UCASE_NL); + +CLASS_ESC_VMAGIC: '\\e' -> type(CLASS_ESC); +CLASS_TAB_VMAGIC: '\\t' -> type(CLASS_TAB); +CLASS_CR_VMAGIC: '\\r' -> type(CLASS_CR); +CLASS_BS_VMAGIC: '\\b' -> type(CLASS_BS); +CLASS_NL_VMAGIC: '\\n' -> type(CLASS_NL); + +// tokens related to changing lexer mode. These are only used by the lexer and not sent to the parser +SETMAGIC_VMAGIC: '\\m' -> mode(DEFAULT_MODE), skip; +SETNOMAGIC_VMAGIC: '\\M' -> mode(NO_MAGIC), skip; +SETVMAGIC_VMAGIC: '\\v' -> skip; // already in very magic mode +SETVNOMAGIC_VMAGIC: '\\V' -> mode(V_NO_MAGIC), skip; + +// everything else, either escaped or not, should be taken literally +LITERAL_CHAR_VMAGIC: '\\'? . -> type(LITERAL_CHAR); + +// ------------------------------------------------------------------------------------------------ // +// // +// // +// V_NO_MAGIC - This mode is set when the lexer comes across an \V token // +// // +// // +// ------------------------------------------------------------------------------------------------ // +mode V_NO_MAGIC; +ALTERNATION_VNOMAGIC: '\\|' -> type(ALTERNATION); +AND_VNOMAGIC: '\\&' -> type(AND); +LEFT_PAREN_VNOMAGIC: '\\(' -> type(LEFT_PAREN); +LEFT_PAREN_NOCAPTURE_VNOMAGIC: '\\%(' -> type(LEFT_PAREN_NOCAPTURE); +RIGHT_PAREN_VNOMAGIC: '\\)' -> type(RIGHT_PAREN); +DOT_VNOMAGIC: '\\.' -> type(DOT); +DOTNL_VNOMAGIC: '\\_.' -> type(DOTNL); +BACKREFERENCE_VNOMAGIC: '\\' [0-9] -> type(BACKREFERENCE); +LAST_SUBSTITUTE_VNOMAGIC: '\\~' -> type(LAST_SUBSTITUTE); +DECIMAL_CODE_VNOMAGIC: '\\%d' [0-9]+ -> type(DECIMAL_CODE); +OCTAL_CODE_VNOMAGIC: '\\%o' [0-7] [0-7]? [0-7]? -> type(OCTAL_CODE); +HEXADECIMAL_CODE_VNOMAGIC: '\\%x' [a-fA-F0-9] [a-fA-F0-9]? -> type(HEXADECIMAL_CODE); +UNICODE_CODE_VNOMAGIC: '\\%u' [a-fA-F0-9] [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? -> type(UNICODE_CODE); +WIDE_UNICODE_CODE_VNOMAGIC: '\\%U' [a-fA-F0-9] [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? -> type(WIDE_UNICODE_CODE); + +// multi +STAR_VNOMAGIC: '\\*' -> type(STAR); +PLUS_VNOMAGIC: '\\+' -> type(PLUS); +OPTIONAL_VNOMAGIC: ('\\=' | '\\?') -> type(OPTIONAL); +ATOMIC_VNOMAGIC: '\\@>' -> type(ATOMIC); +POSITIVE_LOOKAHEAD_VNOMAGIC: '\\@=' -> type(POSITIVE_LOOKAHEAD); +NEGATIVE_LOOKAHEAD_VNOMAGIC: '\\@!' -> type(NEGATIVE_LOOKAHEAD); +POSITIVE_LOOKBEHIND_VNOMAGIC: '\\@<=' -> type(POSITIVE_LOOKBEHIND); +NEGATIVE_LOOKBEHIND_VNOMAGIC: '\\@ type(NEGATIVE_LOOKBEHIND); +POSITIVE_LIMITED_LOOKBEHIND_VNOMAGIC: '\\@' [0-9]+ '<=' -> type(POSITIVE_LIMITED_LOOKBEHIND); +NEGATIVE_LIMITED_LOOKBEHIND_VNOMAGIC: '\\@' [0-9]+ ' type(NEGATIVE_LIMITED_LOOKBEHIND); +RANGE_START_VNOMAGIC: '\\{' -> pushMode(INSIDE_RANGE), type(RANGE_START); +RANGE_START_LAZY_VNOMAGIC: '\\{-' -> pushMode(INSIDE_RANGE), type(RANGE_START_LAZY); + +COLLECTION_START_VNOMAGIC: '\\[' -> pushMode(INSIDE_COLLECTION), type(COLLECTION_START); +COLLECTION_START_EOL_VNOMAGIC: '\\_[' -> pushMode(INSIDE_COLLECTION), type(COLLECTION_START); +OPTIONALLY_MATCHED_START_VNOMAGIC: '\\%[' -> type(OPTIONALLY_MATCHED_START); +OPTIONALLY_MATCHED_END_VNOMAGIC: ']' -> type(OPTIONALLY_MATCHED_END); + +// zero-width tokens +CURSOR_VNOMAGIC: '\\%#' -> type(CURSOR); +START_MATCH_VNOMAGIC: '\\zs' -> type(START_MATCH); +END_MATCH_VNOMAGIC: '\\ze' -> type(END_MATCH); +START_OF_FILE_VNOMAGIC: '\\%^' -> type(START_OF_FILE); +END_OF_FILE_VNOMAGIC: '\\%$' -> type(END_OF_FILE); +START_OF_LINE_VNOMAGIC: '\\_^' -> type(START_OF_LINE); +END_OF_LINE_VNOMAGIC: '\\_$' -> type(END_OF_LINE); +CARET_VNOMAGIC: '\\^' -> type(CARET); +DOLLAR_VNOMAGIC: '\\$' -> type(DOLLAR); +START_OF_WORD_VNOMAGIC: '\\<' -> type(START_OF_WORD); +END_OF_WORD_VNOMAGIC: '\\>' -> type(END_OF_WORD); +VISUAL_VNOMAGIC: '\\%V' -> type(VISUAL); +LINE_VNOMAGIC: '\\%' [0-9]+ 'l' -> type(LINE); +BEFORE_LINE_VNOMAGIC: '\\%<' [0-9]+ 'l' -> type(BEFORE_LINE); +AFTER_LINE_VNOMAGIC: '\\%>' [0-9]+ 'l' -> type(AFTER_LINE); +LINE_CURSOR_VNOMAGIC: '\\%.l' -> type(LINE_CURSOR); +BEFORE_LINE_CURSOR_VNOMAGIC: '\\%<.l' -> type(BEFORE_LINE_CURSOR); +AFTER_LINE_CURSOR_VNOMAGIC: '\\%>.l' -> type(AFTER_LINE_CURSOR); +COLUMN_VNOMAGIC: '\\%' [0-9]+ [cv] -> type(COLUMN); +BEFORE_COLUMN_VNOMAGIC: '\\%<' [0-9]+ [cv] -> type(BEFORE_COLUMN); +AFTER_COLUMN_VNOMAGIC: '\\%>' [0-9]+ [cv] -> type(AFTER_COLUMN); +COLUMN_CURSOR_VNOMAGIC: '\\%.' [cv] -> type(COLUMN_CURSOR); +BEFORE_COLUMN_CURSOR_VNOMAGIC: '\\%<.' [cv] -> type(BEFORE_COLUMN_CURSOR); +AFTER_COLUMN_CURSOR_VNOMAGIC: '\\%>.' [cv] -> type(AFTER_COLUMN_CURSOR); +MARK_VNOMAGIC: '\\%\'' ([a-zA-Z0-9<>'`"^.(){}] | '[' | ']') -> type(MARK); +BEFORE_MARK_VNOMAGIC: '\\%<\'' ([a-zA-Z0-9<>'`"^.(){}] | '[' | ']') -> type(BEFORE_MARK); +AFTER_MARK_VNOMAGIC: '\\%>\'' ([a-zA-Z0-9<>'`"^.(){}] | '[' | ']') -> type(AFTER_MARK); + +// case-related tokens +IGNORE_CASE_VNOMAGIC: '\\c' { setIgnoreCase(); } -> skip; +OT_IGNORE_CASE_VNOMAGIC: '\\C' { setNoIgnoreCase(); } -> skip; + +// character classes +CLASS_IDENTIFIER_VNOMAGIC: '\\i' -> type(CLASS_IDENTIFIER); +CLASS_IDENTIFIER_D_VNOMAGIC: '\\I' -> type(CLASS_IDENTIFIER_D); +CLASS_KEYWORD_VNOMAGIC: '\\k' -> type(CLASS_KEYWORD); +CLASS_KEYWORD_D_VNOMAGIC: '\\K' -> type(CLASS_KEYWORD_D); +CLASS_FILENAME_VNOMAGIC: '\\f' -> type(CLASS_FILENAME); +CLASS_FILENAME_D_VNOMAGIC: '\\F' -> type(CLASS_FILENAME_D); +CLASS_PRINTABLE_VNOMAGIC: '\\p' -> type(CLASS_PRINTABLE); +CLASS_PRINTABLE_D_VNOMAGIC: '\\P' -> type(CLASS_PRINTABLE_D); +CLASS_WS_VNOMAGIC: '\\s' -> type(CLASS_WS); +CLASS_NOT_WS_VNOMAGIC: '\\S' -> type(CLASS_NOT_WS); +CLASS_DIGIT_VNOMAGIC: '\\d' -> type(CLASS_DIGIT); +CLASS_NOT_DIGIT_VNOMAGIC: '\\D' -> type(CLASS_NOT_DIGIT); +CLASS_HEX_VNOMAGIC: '\\x' -> type(CLASS_HEX); +CLASS_NOT_HEX_VNOMAGIC: '\\X' -> type(CLASS_NOT_HEX); +CLASS_OCTAL_VNOMAGIC: '\\o' -> type(CLASS_OCTAL); +CLASS_NOT_OCTAL_VNOMAGIC: '\\O' -> type(CLASS_NOT_OCTAL); +CLASS_WORD_VNOMAGIC: '\\w' -> type(CLASS_WORD); +CLASS_NOT_WORD_VNOMAGIC: '\\W' -> type(CLASS_NOT_WORD); +CLASS_HEADWORD_VNOMAGIC: '\\h' -> type(CLASS_HEADWORD); +CLASS_NOT_HEADWORD_VNOMAGIC: '\\H' -> type(CLASS_NOT_HEADWORD); +CLASS_ALPHA_VNOMAGIC: '\\a' -> type(CLASS_ALPHA); +CLASS_NOT_ALPHA_VNOMAGIC: '\\A' -> type(CLASS_NOT_ALPHA); +CLASS_LCASE_VNOMAGIC: '\\l' -> type(CLASS_LCASE); +CLASS_NOT_LCASE_VNOMAGIC: '\\L' -> type(CLASS_NOT_LCASE); +CLASS_UCASE_VNOMAGIC: '\\u' -> type(CLASS_UCASE); +CLASS_NOT_UCASE_VNOMAGIC: '\\U' -> type(CLASS_NOT_UCASE); + +CLASS_IDENTIFIER_NL_VNOMAGIC: '\\_i' -> type(CLASS_IDENTIFIER_NL); +CLASS_IDENTIFIER_D_NL_VNOMAGIC: '\\_I' -> type(CLASS_IDENTIFIER_D_NL); +CLASS_KEYWORD_NL_VNOMAGIC: '\\_k' -> type(CLASS_KEYWORD_NL); +CLASS_KEYWORD_D_NL_VNOMAGIC: '\\_K' -> type(CLASS_KEYWORD_D_NL); +CLASS_FILENAME_NL_VNOMAGIC: '\\_f' -> type(CLASS_FILENAME_NL); +CLASS_FILENAME_D_NL_VNOMAGIC: '\\_F' -> type(CLASS_FILENAME_D_NL); +CLASS_PRINTABLE_NL_VNOMAGIC: '\\_p' -> type(CLASS_PRINTABLE_NL); +CLASS_PRINTABLE_D_NL_VNOMAGIC: '\\_P' -> type(CLASS_PRINTABLE_D_NL); +CLASS_WS_NL_VNOMAGIC: '\\_s' -> type(CLASS_WS_NL); +CLASS_NOT_WS_NL_VNOMAGIC: '\\_S' -> type(CLASS_NOT_WS_NL); +CLASS_DIGIT_NL_VNOMAGIC: '\\_d' -> type(CLASS_DIGIT_NL); +CLASS_NOT_DIGIT_NL_VNOMAGIC: '\\_D' -> type(CLASS_NOT_DIGIT_NL); +CLASS_HEX_NL_VNOMAGIC: '\\_x' -> type(CLASS_HEX_NL); +CLASS_NOT_HEX_NL_VNOMAGIC: '\\_X' -> type(CLASS_NOT_HEX_NL); +CLASS_OCTAL_NL_VNOMAGIC: '\\_o' -> type(CLASS_OCTAL_NL); +CLASS_NOT_OCTAL_NL_VNOMAGIC: '\\_O' -> type(CLASS_NOT_OCTAL_NL); +CLASS_WORD_NL_VNOMAGIC: '\\_w' -> type(CLASS_WORD_NL); +CLASS_NOT_WORD_NL_VNOMAGIC: '\\_W' -> type(CLASS_NOT_WORD_NL); +CLASS_HEADWORD_NL_VNOMAGIC: '\\_h' -> type(CLASS_HEADWORD_NL); +CLASS_NOT_HEADWORD_NL_VNOMAGIC: '\\_H' -> type(CLASS_NOT_HEADWORD_NL); +CLASS_ALPHA_NL_VNOMAGIC: '\\_a' -> type(CLASS_ALPHA_NL); +CLASS_NOT_ALPHA_NL_VNOMAGIC: '\\_A' -> type(CLASS_NOT_ALPHA_NL); +CLASS_LCASE_NL_VNOMAGIC: '\\_l' -> type(CLASS_LCASE_NL); +CLASS_NOT_LCASE_NL_VNOMAGIC: '\\_L' -> type(CLASS_NOT_LCASE_NL); +CLASS_UCASE_NL_VNOMAGIC: '\\_u' -> type(CLASS_UCASE_NL); +CLASS_NOT_UCASE_NL_VNOMAGIC: '\\_U' -> type(CLASS_NOT_UCASE_NL); + +CLASS_ESC_VNOMAGIC: '\\e' -> type(CLASS_ESC); +CLASS_TAB_VNOMAGIC: '\\t' -> type(CLASS_TAB); +CLASS_CR_VNOMAGIC: '\\r' -> type(CLASS_CR); +CLASS_BS_VNOMAGIC: '\\b' -> type(CLASS_BS); +CLASS_NL_VNOMAGIC: '\\n' -> type(CLASS_NL); + +// tokens related to changing lexer mode. These are only used by the lexer and not sent to the parser +SETMAGIC_VNOMAGIC: '\\m' -> mode(DEFAULT_MODE), skip; +SETNOMAGIC_VNOMAGIC: '\\M' -> mode(NO_MAGIC), skip; +SETVMAGIC_VNOMAGIC: '\\v' -> mode(V_MAGIC), skip; +SETVNOMAGIC_VNOMAGIC: '\\V' -> skip; // already in very nomagic mode + +// everything else, either escaped or not, should be taken literally +LITERAL_CHAR_VNOMAGIC: '\\'? . -> type(LITERAL_CHAR); + +// ------------------------------------------------------------------------------------------------ // +// // +// // +// The following lexer modes may operate on top of any of the other modes, and are used to handle // +// tokens inside ranges and collections // +// // +// // +// ------------------------------------------------------------------------------------------------ // +// Mode for when inside the range quantifier +mode INSIDE_RANGE; +RANGE_END: ('}' | '\\}') -> popMode; +INT: [0-9]+; +COMMA: ','; + +// Mode for when inside the collection +mode INSIDE_COLLECTION; +COLLECTION_END: ']' -> popMode; +CARET: '^'; +DASH: '-'; +DECIMAL_ESCAPE: '\\d' [0-9] [0-9]+ -> type(COLLECTION_LITERAL_CHAR); +OCTAL_ESCAPE: '\\o' [0-7] [0-7]? [0-7]? -> type(COLLECTION_LITERAL_CHAR); +HEXADECIMAL_ESCAPE: '\\x' [a-fA-F0-9] [a-fA-F0-9]? -> type(COLLECTION_LITERAL_CHAR); +UNICODE_ESCAPE: '\\u' [a-fA-F0-9] [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? -> type(COLLECTION_LITERAL_CHAR); +UNICODE_ESCAPE_WIDE: '\\U' [a-fA-F0-9] [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? [a-fA-F0-9]? -> type(COLLECTION_LITERAL_CHAR); +ESC_ESCAPE: '\\e' -> type(COLLECTION_LITERAL_CHAR); +TAB_ESCAPE: '\\t' -> type(COLLECTION_LITERAL_CHAR); +CR_ESCAPE: '\\r' -> type(COLLECTION_LITERAL_CHAR); +BS_ESCAPE: '\\b' -> type(COLLECTION_LITERAL_CHAR); +NL_ESCAPE: '\\n' -> type(COLLECTION_LITERAL_CHAR); +ALNUM_CLASS: '[:alnum:]'; +ALPHA_CLASS: '[:alpha:]'; +BLANK_CLASS: '[:blank:]'; +CNTRL_CLASS: '[:cntrl:]'; +DIGIT_CLASS: '[:digit:]'; +GRAPH_CLASS: '[:graph:]'; +LOWER_CLASS: '[:lower:]'; +PRINT_CLASS: '[:print:]'; +PUNCT_CLASS: '[:punct:]'; +SPACE_CLASS: '[:space:]'; +UPPER_CLASS: '[:upper:]'; +XDIGIT_CLASS: '[:xdigit:]'; +RETURN_CLASS: '[:return:]'; +TAB_CLASS: '[:tab:]'; +ESCAPE_CLASS: '[:escape:]'; +BACKSPACE_CLASS: '[:backspace:]'; +IDENT_CLASS: '[:ident:]'; +KEYWORD_CLASS: '[:keyword:]'; +FNAME_CLASS: '[:fname:]'; +ESCAPED_CHAR: ('\\\\' | '\\-' | '\\^' | '\\]') -> type(COLLECTION_LITERAL_CHAR); +COLLECTION_CHAR: . -> type(COLLECTION_LITERAL_CHAR); \ No newline at end of file diff --git a/vim-engine/src/main/antlr/RegexParser.g4 b/vim-engine/src/main/antlr/RegexParser.g4 new file mode 100644 index 0000000000..c990b889ec --- /dev/null +++ b/vim-engine/src/main/antlr/RegexParser.g4 @@ -0,0 +1,223 @@ +parser grammar RegexParser; + +options { tokenVocab=RegexLexer; } + +/** + * A pattern is a sub_pattern, followed by an end-of-file. + */ +pattern : sub_pattern EOF + ; + +/** + * A sub-pattern is one or more branches, separated by "\|". It matches anything + * that matches one of the branches. Example: "vim\|VIM" matches "vim" and + * matches "VIM". If more than one branch matches, the first one is used. + */ +sub_pattern : (branches+=branch ALTERNATION)* branches+=branch + ; +/** + * A branch is one or more concats, separated by "\&". It matches the last + * concat, but only if all the preceding concats also match at the same + * position. Example: "IdeaVim\&Idea matches "Idea" in "IdeaVim". + */ +branch : CARET + | DOLLAR + | AND + | CARET? (concats+=concat AND)* concats+=concat DOLLAR + | CARET? (concats+=concat AND)* concats+=concat + ; + +/** + * A concat is one or more pieces, concatenated. It matches a match for the + * first piece, followed by a match for the second piece, etc. Example: + * "i[0-9]v", first matches "i", then a digit and then "v". + */ +concat : pieces+=piece+ + ; + +/** + * A piece is an atom, possibly followed by a multi, an indication of how many + * times the atom can be matched. Example: "a*" matches any sequence of "a" + * characters: "", "a", "aa", etc. + */ +piece : atom multi? + ; + +/** + * An atom is an ordinary_atom, or a sub_pattern surrounded with parenthesis. + * If the left parenthesis is preceded by a %, it doesn't count as a + * capture group. + * + * It can also be a sequence of optionally matched atoms. See :help \%[] + */ +atom : ordinary_atom #OrdinaryAtom + | LEFT_PAREN sub_pattern? RIGHT_PAREN #GroupingCapture + | LEFT_PAREN_NOCAPTURE sub_pattern? RIGHT_PAREN #GroupingNoCapture + | OPTIONALLY_MATCHED_START atoms+=atom* OPTIONALLY_MATCHED_END #OptionallyMatched + ; + +/** + * A multi is an indication of how many times the preceding atom should be + * matched. It can be a "*" for zero or more times, "\+" for one or more times, + * "\?" or "\=" for zero or one times, or a range for a more customizable + * number of times. + */ +multi : STAR #ZeroOrMore + | PLUS #OneOrMore + | OPTIONAL #ZeroOrOne + | range #RangeQuantifier + | ATOMIC #Atomic + | POSITIVE_LOOKAHEAD #PositiveLookahead + | NEGATIVE_LOOKAHEAD #NegativeLookahead + | POSITIVE_LOOKBEHIND #PositiveLookbehind + | NEGATIVE_LOOKBEHIND #NegativeLookbehind + | POSITIVE_LIMITED_LOOKBEHIND #PositiveLimitedLookbehind + | NEGATIVE_LIMITED_LOOKBEHIND #NegativeLimitedLookbehind + ; + +/** + * A range is a custom quantification of the number of times that the + * preceding atom can be matched. It can be a range from a number to another, + * up to a number, more that a number, or an absolute number of times. + * Examples: "a\{3,5}" matches any sequence of 3 to 5 "a" characters; + * "a\{,5}" matches any sequence of up to, and including, 5 "a" characters; + * "a\{3,}" matches any sequence of 3 or more "a" characters; + * "a\{4}" matches a sequence of exactly 4 "a" characters; + * "a\{}" matches any sequence of "a" characters. + */ +range : RANGE_START lower_bound=INT? (COMMA upper_bound=INT?)? RANGE_END #RangeGreedy + | RANGE_START_LAZY lower_bound=INT? (COMMA upper_bound=INT?)? RANGE_END #RangeLazy + ; + +/** + * An ordinary_atom can be a single character that matches itself, a token with + * a special meaning, or a collection of characters. + */ +ordinary_atom : (LITERAL_CHAR | CARET | DOLLAR | OPTIONALLY_MATCHED_END) #LiteralChar + | DOT #AnyChar + | DOTNL #AnyCharNL + | BACKREFERENCE #Backreference + | LAST_SUBSTITUTE #LastSubstitute + | zero_width #ZeroWidth + | char_class #CharClass + | collection #Collec + | char_code #CharCode + ; + +/** + * A character class matches any character that is in that class. Example: + * \d matches any digit from 0 to 9. + */ +char_class : (CLASS_IDENTIFIER | CLASS_IDENTIFIER_NL) #Identifier + | (CLASS_IDENTIFIER_D | CLASS_IDENTIFIER_D_NL) #IdentifierNotDigit + | (CLASS_KEYWORD | CLASS_KEYWORD_NL) #Keyword + | (CLASS_KEYWORD_D | CLASS_KEYWORD_D_NL) #KeywordNotDigit + | (CLASS_FILENAME | CLASS_FILENAME_NL) #Filename + | (CLASS_FILENAME_D | CLASS_FILENAME_D_NL) #FilenameNotDigit + | (CLASS_PRINTABLE | CLASS_PRINTABLE_NL) #Printable + | (CLASS_PRINTABLE_D | CLASS_PRINTABLE_D_NL) #PrintableNotDigit + | (CLASS_WS | CLASS_WS_NL) #Whitespace + | (CLASS_NOT_WS | CLASS_NOT_WS_NL) #NotWhitespace + | (CLASS_DIGIT | CLASS_DIGIT_NL) #Digit + | (CLASS_NOT_DIGIT | CLASS_NOT_DIGIT_NL) #NotDigit + | (CLASS_HEX | CLASS_HEX_NL) #Hex + | (CLASS_NOT_HEX | CLASS_NOT_HEX_NL) #NotHex + | (CLASS_OCTAL | CLASS_OCTAL_NL) #Octal + | (CLASS_NOT_OCTAL | CLASS_NOT_OCTAL_NL) #NotOctal + | (CLASS_WORD | CLASS_WORD_NL) #Wordchar + | (CLASS_NOT_WORD | CLASS_NOT_WORD_NL) #Notwordchar + | (CLASS_HEADWORD | CLASS_HEADWORD_NL) #Headofword + | (CLASS_NOT_HEADWORD | CLASS_NOT_HEADWORD_NL) #NotHeadOfWord + | (CLASS_ALPHA | CLASS_ALPHA_NL) #Alpha + | (CLASS_NOT_ALPHA | CLASS_NOT_ALPHA_NL) #NotAlpha + | (CLASS_LCASE | CLASS_LCASE_NL) #Lcase + | (CLASS_NOT_LCASE | CLASS_NOT_LCASE_NL) #NotLcase + | (CLASS_UCASE | CLASS_UCASE_NL) #Ucase + | (CLASS_NOT_UCASE | CLASS_NOT_UCASE_NL) #NotUcase + | CLASS_ESC #Esc + | CLASS_TAB #Tab + | CLASS_CR #CR + | CLASS_BS #BS + | CLASS_NL #NL + ; + +/** + * A collection is a sequence of characters inside square brackets. It + * matches any single caracter in the collection. If two characters in + * the sequence are separated by '-', this is shorthand for the full list + * of ASCII characters between them. Examples: + * "[abc]" matches either "a", "b", or "c". Equivalent to "a\|b\|c"; + * "[0-9]" matches any digit from 0 to 9; + * "[a-zA-Z]" matches any alphabetic character. + */ +collection : COLLECTION_START CARET collection_elems+=collection_elem* COLLECTION_END #CollectionNeg + | COLLECTION_START collection_elems+=collection_elem* COLLECTION_END #CollectionPos + ; + +collection_elem : collection_char_class_expression #CharClassColElem + | start=(COLLECTION_LITERAL_CHAR | DASH | CARET) DASH end=(COLLECTION_LITERAL_CHAR | DASH | CARET) #RangeColElem + | (COLLECTION_LITERAL_CHAR | DASH | CARET) #SingleColElem + ; + +collection_char_class_expression : ALPHA_CLASS #AlphaClass + | ALNUM_CLASS #AlnumClass + | BLANK_CLASS #BlankClass + | CNTRL_CLASS #CntrlClass + | DIGIT_CLASS #DigitClass + | GRAPH_CLASS #GraphClass + | LOWER_CLASS #LowerClass + | PRINT_CLASS #PrintClass + | PUNCT_CLASS #PunctClass + | SPACE_CLASS #SpaceClass + | UPPER_CLASS #UpperClass + | XDIGIT_CLASS #XdigitClass + | RETURN_CLASS #ReturnClass + | TAB_CLASS #TabClass + | ESCAPE_CLASS #EscapeClass + | BACKSPACE_CLASS #BackspaceClass + | IDENT_CLASS #IdentClass + | KEYWORD_CLASS #KeywordClass + | FNAME_CLASS #FnameClass + ; + +/** + * When using zero-width tokens, no characters are + * included in the match. + */ +zero_width : CURSOR #Cursor + | VISUAL #Visual + | START_MATCH #StartMatch + | END_MATCH #EndMatch + | START_OF_FILE #StartOfFile + | END_OF_FILE #EndOfFile + | START_OF_LINE #StartOfLine + | END_OF_LINE #EndOfLine + | START_OF_WORD #StartOfWord + | END_OF_WORD #EndOfWord + | LINE #Line + | BEFORE_LINE #BeforeLine + | AFTER_LINE #AfterLine + | LINE_CURSOR #LineCursor + | BEFORE_LINE_CURSOR #BeforeLineCursor + | AFTER_LINE_CURSOR #AfterLineCursor + | COLUMN #Column + | BEFORE_COLUMN #BeforeColumn + | AFTER_COLUMN #AfterColumn + | COLUMN_CURSOR #ColumnCursor + | BEFORE_COLUMN_CURSOR #BeforeColumnCursor + | AFTER_COLUMN_CURSOR #AfterColumnCursor + | MARK #Mark + | BEFORE_MARK #BeforeMark + | AFTER_MARK #AfterMark + ; + +/** + * Literal characters represented by their code. + * E.g. \%d97 matches with the character 'a' + */ +char_code : DECIMAL_CODE #DecimalCode + | OCTAL_CODE #OctalCode + | HEXADECIMAL_CODE #HexCode + | UNICODE_CODE #HexCode + | WIDE_UNICODE_CODE #HexCode + ; diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/action/motion/visual/VisualSelectPreviousAction.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/action/motion/visual/VisualSelectPreviousAction.kt index a1e2aa9b34..31075c0308 100644 --- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/action/motion/visual/VisualSelectPreviousAction.kt +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/action/motion/visual/VisualSelectPreviousAction.kt @@ -31,7 +31,7 @@ public class VisualSelectPreviousAction : VimActionHandler.SingleExecution() { cmd: Command, operatorArguments: OperatorArguments, ): Boolean { - val selectionType = editor.primaryCaret().lastSelectionInfo.type + val selectionType = editor.primaryCaret().lastSelectionInfo.selectionType val caretToSelectionInfo = editor.carets() .map { it to it.lastSelectionInfo } diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/SelectionInfo.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/SelectionInfo.kt new file mode 100644 index 0000000000..719d594b20 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/SelectionInfo.kt @@ -0,0 +1,84 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.api + +import com.maddyhome.idea.vim.common.TextRange +import com.maddyhome.idea.vim.state.mode.Mode +import com.maddyhome.idea.vim.state.mode.SelectionType +import com.maddyhome.idea.vim.state.mode.mode +import kotlin.math.max +import kotlin.math.min + +/** + * Represents information about a text selection, primarily utilized for marking selection boundaries. + * This class is used to track the start and end points of a selection. The values of [start] and [end] can be null, + * indicating situations where either the start or end of a selection mark was removed, or when a single mark was set manually without a real selection. + * It is important to note that [start] and [end] are not necessarily in sequential order. The [start] represents the position where the caret begins, and [end] where it stops. + * This allows for the caret to move in reverse order, thus [start] and [end] are not guaranteed to be in any specific order. + * The [start] property is not the actual start offset of the selection but represents the '<' mark. In the case of line-wise selection, it will be the offset at the start of the line. + * Similarly, the [end] property is not the actual end offset of the selection but represents the '>' mark. For line-wise selections, this will be the offset at the end of the line. + * + * @property start The BufferPosition marking the start of the selection or caret. + * @property end The BufferPosition marking the end of the selection or caret. + * @property selectionType The type of selection being represented (character-wise, line-wise, etc.). + */ +public data class SelectionInfo(public var start: BufferPosition?, public var end: BufferPosition?, public val selectionType: SelectionType) { + /** + * Provides the start and end BufferPositions in sorted order as a Pair. This property ensures a sequential + * order of positions, regardless of the caret movement direction. + */ + public val startEndSorted: Pair? get() = sortBufferPositions(start, end) + + public fun getSelectionRange(editor: VimEditor): TextRange? { + val (sortedStart, sortedEnd) = startEndSorted ?: return null + return when (selectionType) { + SelectionType.CHARACTER_WISE -> TextRange(editor.bufferPositionToOffset(sortedStart), editor.bufferPositionToOffset(sortedEnd) + 1) + SelectionType.LINE_WISE -> { + val startOffset = editor.getLineStartOffset(sortedStart.line) + val endOffset = editor.getLineEndOffset(sortedEnd.line, true) + 1 + return TextRange(startOffset, endOffset) + } + SelectionType.BLOCK_WISE -> { + val topLine = sortedStart.line + val bottomLine = sortedEnd.line + val leftColumn = min(sortedStart.column, sortedEnd.column) + val rightColumn = max(sortedStart.column, sortedEnd.column) + + val startOffsets = (topLine .. bottomLine).map { editor.getOffset(it, leftColumn) }.toIntArray() + val endOffsets = (topLine .. bottomLine).map { editor.getOffset(it, rightColumn) + 1 }.toIntArray() + return TextRange(startOffsets, endOffsets) + } + } + } + + public fun isSelected(offset: Int, editor: VimEditor): Boolean { + return getSelectionRange(editor)?.contains(offset) ?: false + } + + private fun sortBufferPositions(pos1: BufferPosition?, pos2: BufferPosition?): Pair? { + if (pos1 == null || pos2 == null) return null + return if (pos1.line != pos2.line) { + if (pos1.line < pos2.line) Pair(pos1, pos2) else Pair(pos2, pos1) + } else { + if (pos1.column < pos2.column) Pair(pos1, pos2) else Pair(pos2, pos1) + } + } + + public companion object { + public fun collectCurrentSelectionInfo(caret: VimCaret): SelectionInfo? { + val editor = caret.editor + val mode = editor.mode + + if (mode !is Mode.VISUAL) return null + val start = editor.offsetToBufferPosition(caret.vimSelectionStart) + val end = editor.offsetToBufferPosition(caret.offset.point) + return SelectionInfo(start, end, mode.selectionType) + } + } +} diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimCaret.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimCaret.kt index 2c728aeb16..32a76fb56c 100644 --- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimCaret.kt +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimCaret.kt @@ -162,5 +162,3 @@ public interface CaretRegisterStorage { public fun setKeys(register: Char, keys: List) public fun saveRegister(r: Char, register: Register) } - -public data class SelectionInfo(public val start: BufferPosition?, public val end: BufferPosition?, public val type: SelectionType) diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimMarkServiceBase.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimMarkServiceBase.kt index c51997b826..ed30a819fc 100644 --- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimMarkServiceBase.kt +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimMarkServiceBase.kt @@ -36,6 +36,7 @@ import com.maddyhome.idea.vim.helper.vimStateMachine import com.maddyhome.idea.vim.mark.Jump import com.maddyhome.idea.vim.mark.Mark import com.maddyhome.idea.vim.mark.VimMark +import com.maddyhome.idea.vim.state.mode.Mode import java.lang.Integer.max import java.lang.Integer.min import java.util.* @@ -234,18 +235,17 @@ public abstract class VimMarkServiceBase : VimMarkService { } override fun setVisualSelectionMarks(editor: VimEditor) { - if (!editor.inVisualMode) return - val selectionType = editor.mode.selectionType ?: CHARACTER_WISE - editor.carets() - .forEach { - val start = editor.offsetToBufferPosition(it.vimSelectionStart) - val end = editor.offsetToBufferPosition(it.offset.point) - it.lastSelectionInfo = SelectionInfo(start, end, selectionType) - } + val mode = editor.mode + if (mode !is Mode.VISUAL) return + + editor.carets().forEach { + it.lastSelectionInfo = SelectionInfo.collectCurrentSelectionInfo(it) + ?: SelectionInfo(null, null, CHARACTER_WISE) + } } override fun getVisualSelectionMarks(caret: ImmutableVimCaret): TextRange? { - return getMarksRange(caret, SELECTION_START_MARK, SELECTION_END_MARK) + return caret.lastSelectionInfo.getSelectionRange(caret.editor) } override fun getChangeMarks(caret: ImmutableVimCaret): TextRange? { @@ -291,13 +291,13 @@ public abstract class VimMarkServiceBase : VimMarkService { private fun removeSelectionStartMark(caret: ImmutableVimCaret) { val selectionInfo = caret.lastSelectionInfo val startPosition = selectionInfo.start - if (startPosition != null) caret.lastSelectionInfo = SelectionInfo(null, selectionInfo.end, selectionInfo.type) + if (startPosition != null) caret.lastSelectionInfo = SelectionInfo(null, selectionInfo.end, selectionInfo.selectionType) } private fun removeSelectionEndMark(caret: ImmutableVimCaret) { val selectionInfo = caret.lastSelectionInfo val endPosition = selectionInfo.end - if (endPosition != null) caret.lastSelectionInfo = SelectionInfo(selectionInfo.start, null, selectionInfo.type) + if (endPosition != null) caret.lastSelectionInfo = SelectionInfo(selectionInfo.start, null, selectionInfo.selectionType) } override fun removeGlobalMark(char: Char) { @@ -345,7 +345,7 @@ public abstract class VimMarkServiceBase : VimMarkService { } if (newStartPosition != startPosition || newEndPosition != endPosition) { - caret.lastSelectionInfo = SelectionInfo(newStartPosition, newEndPosition, selectionInfo.type) + caret.lastSelectionInfo = SelectionInfo(newStartPosition, newEndPosition, selectionInfo.selectionType) } } } @@ -406,6 +406,10 @@ public abstract class VimMarkServiceBase : VimMarkService { } else { caret.markStorage.clear(caret) } + caret.lastSelectionInfo.apply { + start = null + end = null + } } override fun resetAllMarks() { @@ -522,7 +526,7 @@ public abstract class VimMarkServiceBase : VimMarkService { startOffset } ?: return null - if (selectionInfo.type == SelectionType.LINE_WISE) { + if (selectionInfo.selectionType == SelectionType.LINE_WISE) { offset = caret.editor.getLineStartForOffset(offset) } @@ -539,7 +543,7 @@ public abstract class VimMarkServiceBase : VimMarkService { endOffset } ?: return null - if (selectionInfo.type == SelectionType.LINE_WISE) { + if (selectionInfo.selectionType == SelectionType.LINE_WISE) { offset = caret.editor.getLineEndForOffset(offset) } @@ -548,12 +552,12 @@ public abstract class VimMarkServiceBase : VimMarkService { private fun setSelectionStartMark(caret: ImmutableVimCaret, offset: Int) { val selectionInfo = caret.lastSelectionInfo - caret.lastSelectionInfo = SelectionInfo(caret.editor.offsetToBufferPosition(offset), selectionInfo.end, selectionInfo.type) + caret.lastSelectionInfo = SelectionInfo(caret.editor.offsetToBufferPosition(offset), selectionInfo.end, selectionInfo.selectionType) } private fun setSelectionEndMark(caret: ImmutableVimCaret, offset: Int) { val selectionInfo = caret.lastSelectionInfo - caret.lastSelectionInfo = SelectionInfo(selectionInfo.start, caret.editor.offsetToBufferPosition(offset), selectionInfo.type) + caret.lastSelectionInfo = SelectionInfo(selectionInfo.start, caret.editor.offsetToBufferPosition(offset), selectionInfo.selectionType) } private fun createMark(caret: ImmutableVimCaret, char: Char, offset: Int): Mark? { diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimProcessGroup.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimProcessGroup.kt index a714a9c76b..4f02e8c982 100644 --- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimProcessGroup.kt +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimProcessGroup.kt @@ -8,10 +8,13 @@ package com.maddyhome.idea.vim.api import com.maddyhome.idea.vim.command.Command +import com.maddyhome.idea.vim.state.mode.Mode import javax.swing.KeyStroke public interface VimProcessGroup { public val lastCommand: String? + public val isCommandProcessing: Boolean + public val modeBeforeCommandProcessing: Mode? public fun startSearchCommand(editor: VimEditor, context: ExecutionContext, count: Int, leader: Char) public fun endSearchCommand(): String diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimSearchGroup.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimSearchGroup.kt index 5cb308236c..bbf68fb345 100644 --- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimSearchGroup.kt +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimSearchGroup.kt @@ -15,11 +15,53 @@ import com.maddyhome.idea.vim.regexp.CharPointer import com.maddyhome.idea.vim.vimscript.model.VimLContext public interface VimSearchGroup { + + /** + * Last used pattern to perform a search. + */ public var lastSearchPattern: String? + + /** + * Last used pattern to perform a substitution. + */ public var lastSubstitutePattern: String? - public fun findUnderCaret(editor: VimEditor): TextRange? + public fun searchBackward(editor: VimEditor, offset: Int, count: Int): TextRange? + + /** + * Find the range of the next occurrence of the last used search pattern + * + *

Used for the implementation of the gn and gN commands.

+ * + *

Searches for the range of the next occurrence of the last used search pattern. If the current primary + * caret is inside the range of an occurrence, will return that instance. Uses the last used search pattern. Does not + * update any other state. Direction is explicit, not from state.

+ * + * @param editor The editor to search in + * @param count Find the nth occurrence + * @param forwards Search forwards or backwards + * @return The TextRange of the next occurrence or null if not found + */ public fun getNextSearchRange(editor: VimEditor, count: Int, forwards: Boolean): TextRange? + + /** + * Process the pattern being used as a search range + * + *

Find the next offset of the search pattern, without processing the pattern further. This is not a full search + * pattern, as handled by processSearchCommand. It does not contain a pattern offset and there are not multiple + * patterns separated by `;`. Ranges do support multiple patterns, separation with both `;` and `,` and a `+/-{num}` + * suffix, but these are all handled by the range itself.

+ * + *

This method is essentially a wrapper around SearchHelper.findPattern (via findItOffset) that updates state and + * highlighting.

+ * + * @param editor The editor to search in + * @param pattern The pattern to search for. Does not include leading or trailing `/` and `?` characters + * @param patternOffset The offset applied to the range. Not used during searching, but used to populate lastPatternOffset + * @param startOffset The offset to start searching from + * @param direction The direction to search in + * @return The offset of the match or -1 if not found + */ public fun processSearchRange( editor: VimEditor, pattern: String, @@ -28,10 +70,92 @@ public interface VimSearchGroup { direction: Direction, ): Int + /** + * Find the next occurrence of the last used pattern. + * + *

Searches for the last used pattern, including last used pattern trailing. Direction is the same as the last used direction. + * E.g. `?foo` followed by `n` will search backwards. scanwrap and ignorecase come from options.

+ * + * @param editor The editor to search in + * @param caret Used to get the offset to start searching from + * @param count Find the nth occurrence + * @return The offset of the next match, or -1 if not found + */ public fun searchNext(editor: VimEditor, caret: ImmutableVimCaret, count: Int): Int + + /** + * Find the previous occurrence of the last used pattern. + * + *

Searches for last used pattern, including last used pattern trailing. Direction is the opposite of the last used direction. + * E.g. `?foo` followed by `N` will be forwards. scanwrap and ignorecase come from options.

+ * + * @param editor The editor to search in + * @param caret Used to get the offset to starting searching from + * @param count Find the nth occurrence + * @return The offset of the next match, or -1 if not found + */ public fun searchPrevious(editor: VimEditor, caret: ImmutableVimCaret, count: Int): Int + + /** + * Process the search command, searching for the pattern from the given document offset + * + *

Parses the pattern from the search command and will search for the given pattern, immediately saving the last used + * search pattern. Updates the search register and history and search highlights. Also updates last pattern offset and + * direction. scanwrap and ignorecase come from options. + * + *

Will parse the entire command, including patterns separated by `;`

+ * + *

Note that this method should only be called when the ex command argument should be parsed, and start should be + * updated. I.e. only for the search commands. Consider using SearchHelper.findPattern to find text.

+ * + *

Equivalent to normal.c:nv_search + search.c:do_search

+ * + * @param editor The editor to search in + * @param startOffset The offset to start searching from + * @param command The command text entered into the Ex entry panel. Does not include the leading `/` or `?`. + * Can include a trailing offset, e.g. /{pattern}/{offset}, or multiple commands separated by a semicolon. + * If the pattern is empty, the last used (search? substitute?) pattern (and offset?) is used. + * @param dir The direction to search + * @return Offset to the next occurrence of the pattern or -1 if not found + */ public fun processSearchCommand(editor: VimEditor, command: String, startOffset: Int, dir: Direction): Int + + /** + * Search for the word under the given caret + * + *

Updates last search pattern, last pattern trailing and direction. Ignore smart case is set to true. Highlights + * are updated. scanwrap and ignorecase come from options.

+ * + *

Equivalent to normal.c:nv_ident

+ * + * @param editor The editor to search in + * @param caret The caret to use to look for the current word + * @param count Search for the nth occurrence of the current word + * @param whole Include word boundaries in the search pattern + * @param dir Which direction to search + * @return The offset of the result or the start of the word under the caret if not found. Returns -1 on error + */ public fun searchWord(editor: VimEditor, caret: ImmutableVimCaret, count: Int, whole: Boolean, dir: Direction): Int + + /** + * Parse and execute the substitute command + * + *

Updates state for the last substitute pattern and last replacement text. Updates search + * history and register. Also updates stored substitution flags.

+ * + *

Saves the current location as a jump location and restores caret location after completion. If confirmation is + * enabled and the substitution is abandoned, the current caret location is kept, and the original location is not + * restored.

+ * + *

See ex_cmds.c:ex_substitute

+ * + * @param editor The editor to search in + * @param caret The caret to use for initial search offset, and to move for interactive substitution + * @param range Only search and substitute within the given line range. Must be valid + * @param excmd The command part of the ex command line, e.g. `s` or `substitute`, or `~` + * @param exarg The argument to the substitute command, such as `/{pattern}/{string}/[flags]` + * @return True if the substitution succeeds, false on error. Will succeed even if nothing is modified + */ public fun processSubstituteCommand( editor: VimEditor, caret: VimCaret, @@ -41,21 +165,15 @@ public interface VimSearchGroup { parent: VimLContext, ): Boolean - // TODO rewrite this - public fun search_regcomp(pat: CharPointer?, which_pat: Int, patSave: Int): Pair> public fun findDecimalNumber(line: String): Int? + + /** + * Clears all search highlights. + */ public fun clearSearchHighlight() + /** + * Gets the direction lastly used in a search. + */ public fun getLastSearchDirection(): Direction - - // Matching the values defined in Vim. Do not change these values, they are used as indexes - public companion object { - public val RE_SEARCH: Int = 0 // Save/use search pattern - - public val RE_SUBST: Int = 1 // Save/use substitute pattern - - public val RE_BOTH: Int = 2 // Save to both patterns - - public val RE_LAST: Int = 2 // Use last used pattern if "pat" is NULL - } } diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimSearchGroupBase.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimSearchGroupBase.kt index c2743223c8..5eb5946fe6 100644 --- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimSearchGroupBase.kt +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimSearchGroupBase.kt @@ -8,4 +8,1099 @@ package com.maddyhome.idea.vim.api -public abstract class VimSearchGroupBase : VimSearchGroup +import com.maddyhome.idea.vim.common.Direction +import com.maddyhome.idea.vim.common.TextRange +import com.maddyhome.idea.vim.ex.ExException +import com.maddyhome.idea.vim.ex.ranges.LineRange +import com.maddyhome.idea.vim.helper.CharacterHelper +import com.maddyhome.idea.vim.helper.SearchOptions +import com.maddyhome.idea.vim.helper.exitVisualMode +import com.maddyhome.idea.vim.history.HistoryConstants +import com.maddyhome.idea.vim.regexp.CharPointer +import com.maddyhome.idea.vim.regexp.VimRegex +import com.maddyhome.idea.vim.regexp.VimRegexException +import com.maddyhome.idea.vim.regexp.VimRegexOptions +import com.maddyhome.idea.vim.register.RegisterConstants +import com.maddyhome.idea.vim.state.mode.inVisualMode +import com.maddyhome.idea.vim.vimscript.model.VimLContext +import com.maddyhome.idea.vim.vimscript.model.datatypes.VimString +import com.maddyhome.idea.vim.vimscript.model.expressions.Expression +import com.maddyhome.idea.vim.vimscript.model.expressions.SimpleExpression +import org.jetbrains.annotations.TestOnly +import java.text.NumberFormat +import java.text.ParsePosition +import java.util.* +import kotlin.math.max +import kotlin.math.min + +public abstract class VimSearchGroupBase : VimSearchGroup { + + protected companion object { + /** + * Last ignore smartcase option. + */ + @JvmStatic + protected var lastIgnoreSmartCase: Boolean = false + + /** + * Last string trailing a pattern. E.g. in '/pattern/e+2', 'e+2' is trailing. + */ + private var lastPatternTrailing: String? = "" + + /** + * Last used search direction. + */ + private var lastDirection: Direction = Direction.FORWARDS + + /** + * The type of the last used pattern. + */ + private var lastPatternType: PatternType? = null + + /** + * Last used substitute string. + */ + private var lastSubstituteString: String? = null + + private val CLASS_NAMES: List = listOf( + "alnum:]", + "alpha:]", + "blank:]", + "cntrl:]", + "digit:]", + "graph:]", + "lower:]", + "print:]", + "punct:]", + "space:]", + "upper:]", + "xdigit:]", + "tab:]", + "return:]", + "backspace:]", + "escape:]", + ) + } + + /** + * Highlights lines startLine to endLine (inclusive), using the last used pattern. + * + * @param editor The editor to highlight. + * @param startLine The number of the line where to start highlighting (inclusive) + * @param endLine The number of the line where to stop highlighting (inclusive) + */ + protected abstract fun highlightSearchLines( + editor: VimEditor, + startLine: Int, + endLine: Int, + ) + + /** + * Updates the current search highlights. + * + * @param force Whether to force this update. + */ + protected abstract fun updateSearchHighlights( + force: Boolean, + ) + + /** + * Reset the search highlights to the last used pattern after highlighting incsearch results. + */ + protected abstract fun resetIncsearchHighlights() + + /** + * Asks the user how to deal with a substitution confirmation choice. + * Used when the 'c' flag is present in a substitute command. + * + * @param editor The editor where the substitution would be made. + * @param match The string that would replace the old one. + * @param caret The current caret. + * @param startOffset The index where the substitution would be made. + */ + protected abstract fun confirmChoice( + editor: VimEditor, + match: String, + caret: VimCaret, + startOffset: Int, + ): ReplaceConfirmationChoice + + /** + * Parses a string representing a Vimscript expression. + * + * @param expressionString A string representing a VimScript expression. + * @return An internal representation of a VimScript expression. + */ + protected abstract fun parseVimScriptExpression( + expressionString: String, + ): Expression? + + /** + * Highlights the string that would be replaced (pending user confimation) in + * a substitute command. + * + * @param editor The editor where the substitution would be made. + * @param startOffset The offset where the highlight should start + * @param endOffset The offset where the highlight should end + */ + protected abstract fun addSubstitutionConfirmationHighlight( + editor: VimEditor, + startOffset: Int, + endOffset: Int, + ) + + /** + * Saves the latest matched string, for Vimscript purposes. + * + * @param match The match to save. + */ + protected abstract fun setLatestMatch( + match: String, + ) + + /** + * Replaces a string in the editor. + * + * @param editor The editor where the replacement is to take place. + * @param startOffset The offset where the string that is to be replaced starts (inclusive). + * @param endOffset The offset where the string that is to be replaced ends (exclusive). + * @param newString The new string that will replace the old one. + */ + protected abstract fun replaceString( + editor: VimEditor, + startOffset: Int, + endOffset: Int, + newString: String, + ) + + /** + * Resets the variable that determines whether search highlights should be shown. + */ + protected abstract fun resetSearchHighlight() + + abstract override fun clearSearchHighlight() + + /** + * Whether to do multiple substitutions in the same line. 'g' flag. + */ + private var doAll = false + + /** + * Whether to ask for confirmation during substitution. 'c' flag. + */ + private var doAsk = false + + /** + * Whether to report errors. 'e' flag. + */ + private var doError = true // if false, ignore errors + + /** + * Whether to ignore case. 'i' or 'I' flags. + * If null means to keep default settings. + */ + private var doIgnorecase: Boolean? = null // ignore case flag + + override var lastSearchPattern: String? = null + override var lastSubstitutePattern: String? = null + + // TODO: this can be made not open and private when SearchGroup.java is removed + /** + * Gets the latest used pattern for search or substitution. + */ + protected open fun getLastUsedPattern(): String? { + return when (lastPatternType) { + PatternType.SEARCH -> lastSearchPattern + PatternType.SUBSTITUTE -> lastSubstitutePattern + else -> null + } + } + + /****************************************************************************/ + /* Search related methods */ + /****************************************************************************/ + + protected fun findUnderCaret( + editor: VimEditor, + ): TextRange? { + val backSearch = searchBackward(editor, editor.primaryCaret().offset.point + 1, 1) ?: return null + return if (backSearch.contains(editor.primaryCaret().offset.point)) backSearch else null + } + + override fun searchBackward( + editor: VimEditor, + offset: Int, + count: Int, + ): TextRange? { + // Backward search returns wrong end offset for some cases. That's why we should perform additional forward search + val searchOptions = EnumSet.of(SearchOptions.WRAP, SearchOptions.WHOLE_FILE, SearchOptions.BACKWARDS) + val foundBackward = injector.searchHelper.findPattern(editor, getLastUsedPattern(), offset, count, searchOptions) ?: return null + var startOffset = foundBackward.startOffset - 1 + if (startOffset < 0) startOffset = editor.fileSize().toInt() + searchOptions.remove(SearchOptions.BACKWARDS) + return injector.searchHelper.findPattern(editor, getLastUsedPattern(), startOffset, 1, searchOptions) + } + + override fun getNextSearchRange( + editor: VimEditor, + count: Int, + forwards: Boolean, + ): TextRange? { + editor.removeSecondaryCarets() + var current = findUnderCaret(editor) + + if (current == null || editor.inVisualMode && atEdgeOfGnRange( + current, + editor, + forwards + ) + ) { + current = findNextSearchForGn(editor, count, forwards) + } else if (count > 1) { + current = findNextSearchForGn(editor, count - 1, forwards) + } + return current + } + + override fun processSearchRange( + editor: VimEditor, + pattern: String, + patternOffset: Int, + startOffset: Int, + direction: Direction, + ): Int { + // Will set last pattern, required by findItOffset + // IgnoreSmartCase and Direction are always reset. + // PatternOffset is cleared before searching. ExRanges will add/subtract the line offset from the final search range + // pattern, but we need the value to update lastPatternOffset for future searches. + // TODO: Consider improving pattern offset handling + setLastUsedPattern(pattern, PatternType.SEARCH, true) + lastIgnoreSmartCase = false + lastPatternTrailing = "" // Do not apply a pattern offset yet! + + lastDirection = direction + + resetSearchHighlight() + updateSearchHighlights(true) + + val result = findItOffset(editor, startOffset, 1, lastDirection) + + // Set lastPatternOffset AFTER searching, so it doesn't affect the result + lastPatternTrailing = if (patternOffset != 0) patternOffset.toString() else "" + + return result + } + + override fun searchNext(editor: VimEditor, caret: ImmutableVimCaret, count: Int): Int { + return searchNextWithDirection(editor, caret, count, lastDirection) + } + + override fun searchPrevious(editor: VimEditor, caret: ImmutableVimCaret, count: Int): Int { + return searchNextWithDirection(editor, caret, count, lastDirection.reverse()) + } + override fun processSearchCommand( + editor: VimEditor, + command: String, + startOffset: Int, + dir: Direction, + ): Int { + var isNewPattern = false + var pattern: String? = null + var patternOffset: String? = null + + val type = if (dir === Direction.FORWARDS) '/' else '?' + + if (command.isNotEmpty()) { + if (command[0] != type) { + val endOfPattern = findEndOfPattern(command, type) + pattern = command.substring(0, endOfPattern) + isNewPattern = true + patternOffset = if (endOfPattern < command.length) command.substring(endOfPattern + 1) else "" + } else if (command.length == 1) { + patternOffset = "" + } else { + patternOffset = command.substring(1) + } + } + + if (pattern.isNullOrEmpty()) { + pattern = lastSearchPattern + patternOffset = lastPatternTrailing + if (pattern.isNullOrEmpty()) { + isNewPattern = true + pattern = lastSubstitutePattern + if (pattern.isNullOrEmpty()) { + injector.messages.showStatusBarMessage(null, "E35: No previous regular expression") + return -1 + } + } + } + + // Only update the last pattern with a new input pattern. Do not update if we're reusing the last pattern + setLastUsedPattern(pattern, PatternType.SEARCH, isNewPattern) + + lastIgnoreSmartCase = false + lastPatternTrailing = patternOffset // This might include extra search patterns separated by `;` + + lastDirection = dir + + resetSearchHighlight() + updateSearchHighlights(true) + + return findItOffset(editor, startOffset, 1, lastDirection) + } + + override fun searchWord( + editor: VimEditor, + caret: ImmutableVimCaret, + count: Int, + whole: Boolean, + dir: Direction, + ): Int { + val range: TextRange = findWordUnderCursor(editor, caret) ?: return -1 + + val start = range.startOffset + val end = range.endOffset + val pattern: String = if (whole) "\\<${editor.getText(start, end)}\\>" else editor.getText(start, end) + + // Updates last pattern, ready for findItOffset + // Direction is always saved + // IgnoreSmartCase is always set to true + // There is no pattern offset available + setLastUsedPattern(pattern, PatternType.SEARCH, true) + lastIgnoreSmartCase = true + lastPatternTrailing = "" + lastDirection = dir + + resetSearchHighlight() + updateSearchHighlights(true) + + val offset = findItOffset(editor, range.startOffset, count, lastDirection) + return if (offset == -1) range.startOffset else offset + } + + private fun findNextSearchForGn( + editor: VimEditor, + count: Int, + forwards: Boolean, + ): TextRange? { + return if (forwards) { + val searchOptions = EnumSet.of(SearchOptions.WRAP, SearchOptions.WHOLE_FILE) + injector.searchHelper.findPattern( + editor, + getLastUsedPattern(), + editor.primaryCaret().offset.point, + count, + searchOptions + ) + } else { + searchBackward(editor, editor.primaryCaret().offset.point, count) + } + } + + + private fun atEdgeOfGnRange( + nextRange: TextRange, + editor: VimEditor, + forwards: Boolean, + ): Boolean { + val currentPosition: Int = editor.currentCaret().offset.point + return if (forwards) { + nextRange.endOffset - injector.visualMotionGroup.selectionAdj == currentPosition + } else { + nextRange.startOffset == currentPosition + } + } + + private fun searchNextWithDirection( + editor: VimEditor, + caret: ImmutableVimCaret, + count: Int, + dir: Direction, + ): Int { + resetSearchHighlight() + updateSearchHighlights(true) + + val startOffset: Int = caret.offset.point + var offset = findItOffset(editor, startOffset, count, dir) + if (offset == startOffset) { + /* Avoid getting stuck on the current cursor position, which can + * happen when an offset is given and the cursor is on the last char + * in the buffer: Repeat with count + 1. */ + offset = findItOffset(editor, startOffset, count + 1, dir) + } + return offset + } + + private fun findEndOfPattern( + command: String, + delimiter: Char, + startIndex: Int = 0 + ): Int { + var magic = true + + var i = startIndex + while (i < command.length) { + // delimiter found + if (command[i] == delimiter) break + + // collection start found, ignore until end of collection + if (magic && command[i] == '[' || + !magic && command[i] == '\\' && i + 1 < command.length && command[i + 1] == '[') { + + i = findEndOfCollection(command, i) + // skip escaped char + } else if (command[i] == '\\' && i + 1 < command.length) { + i++ + // update magic + if (command[i] == 'v' || command[i] == 'm') magic = true + if (command[i] == 'V' || command[i] == 'M') magic = false + } + i++ + } + return i + } + + private fun findEndOfCollection( + command: String, + startIndex: Int + ): Int { + var i = startIndex + while (i < command.length - 1) { + // collection end found + if (command[i] == ']') break + + // skip escaped char + if (command[i] == '\\' && i + 1 < command.length) i++ + // skip character class + else if (i + 1 < command.length && command[i] == '[' && command[i + 1] < ':') i = findEndOfCharacterClass(command, i + 2) + + i++ + } + return i + } + + private fun findEndOfCharacterClass( + command: String, + startIndex: Int + ): Int { + for (charClass in CLASS_NAMES) { + if (startIndex + charClass.length < command.length && command.substring(startIndex, startIndex + charClass.length) == charClass) + // char class found, skip to end of it + return startIndex + charClass.length - 1 + } + // there wasn't any valid character class + return startIndex + } + + + /** + * Find the word under the cursor or the next word to the right of the cursor on the current line. + * + * @param editor The editor to find the word in + * @param caret The caret to find word under + * @return The text range of the found word or null if there is no word under/after the cursor on the line + */ + private fun findWordUnderCursor( + editor: VimEditor, + caret: ImmutableVimCaret, + ): TextRange? { + + val stop: Int = editor.getLineEndOffset(caret.getBufferPosition().line, true) + val pos: Int = caret.offset.point + + // Technically the first condition is covered by the second one, but let it be + if (editor.text().isEmpty() || editor.text().length <= pos) return null + //if (pos == chars.length() - 1) return new TextRange(chars.length() - 1, chars.length()); + var start = pos + val types = arrayOf( + CharacterHelper.CharacterType.KEYWORD, + CharacterHelper.CharacterType.PUNCTUATION + ) + for (i in 0..1) { + start = pos + val type = CharacterHelper.charType(editor, editor.text()[start], false) + if (type === types[i]) { + // Search back for start of word + while (start > 0 && CharacterHelper.charType(editor, editor.text()[start - 1], false) === types[i]) { + start-- + } + } else { + // Search forward for start of word + while (start < stop && CharacterHelper.charType(editor, editor.text()[start], false) !== types[i]) { + start++ + } + } + if (start != stop) { + break + } + } + if (start == stop) { + return null + } + // Special case 1 character words because 'findNextWordEnd' returns one to many chars + val end: Int = if (start < stop && + (start >= editor.text().length - 1 || + CharacterHelper.charType(editor, editor.text()[start + 1], false) !== CharacterHelper.CharacterType.KEYWORD) + ) { + start + 1 + } else { + injector.searchHelper.findNextWordEnd(editor, start, 1, bigWord = false, spaceWords = false) + 1 + } + return TextRange(start, end) + } + + /****************************************************************************/ + /* Substitute related methods */ + /****************************************************************************/ + override fun processSubstituteCommand( + editor: VimEditor, + caret: VimCaret, + range: LineRange, + excmd: String, + exarg: String, + parent: VimLContext, + ): Boolean { + // Explicitly exit visual mode here, so that visual mode marks don't change when we move the cursor to a match. + val exceptions: MutableList = ArrayList() + if (editor.inVisualMode) editor.exitVisualMode() + + // Parse Ex command and arguments to extract the pattern, substitute string, and line range + val substituteCommandParse = parseSubstituteCommand(editor, range, excmd, exarg) ?: return false + val pattern = substituteCommandParse.pattern + val substituteString = substituteCommandParse.substituteString + val line1 = substituteCommandParse.range.startLine + var line2 = substituteCommandParse.range.endLine + + val options: MutableList = ArrayList() + if (injector.globalOptions().smartcase) options.add(VimRegexOptions.SMART_CASE) + if (injector.globalOptions().ignorecase) options.add(VimRegexOptions.IGNORE_CASE) + if (injector.globalOptions().wrapscan) options.add(VimRegexOptions.WRAP_SCAN) + + val regex: VimRegex = try { + VimRegex(pattern) + } catch (e: VimRegexException) { + injector.messages.showStatusBarMessage(editor, e.message) + return false + } + + val hasExpression = substituteString.length >= 2 && substituteString[0] == '\\' && substituteString[1] == '=' + + val oldLastSubstituteString: String = lastSubstituteString ?: "" + lastSubstituteString = substituteString + "" + + resetSearchHighlight() + updateSearchHighlights(true) + + var lastMatchStartOffset = -1 + var gotQuit = false + var column = 0 + var line = line1 + while (line <= line2 && !gotQuit) { + val substituteResult = regex.substitute(editor, substituteString, oldLastSubstituteString, line, column, hasExpression, options) + if (substituteResult == null) { + line++ + column = 0 + continue + } + + injector.jumpService.saveJumpLocation(editor) + val matchRange = substituteResult.first.range + var expression: Expression? = null + if (hasExpression) { + val exprString = substituteString.substring(2) + expression = parseVimScriptExpression(exprString) + if (expression == null) { + exceptions.add(ExException("E15: Invalid expression: $exprString")) + expression = SimpleExpression(VimString("")) + } + } + var match = substituteResult.second + lastMatchStartOffset = matchRange.startOffset + + var didReplace = false + if (doAll || line != editor.lineCount()) { + var doReplace = true + if (doAsk) { + addSubstitutionConfirmationHighlight(editor, matchRange.startOffset, matchRange.endOffset) + + val choice: ReplaceConfirmationChoice = confirmChoice(editor, match, caret, matchRange.startOffset) + when (choice) { + ReplaceConfirmationChoice.SUBSTITUTE_THIS -> {} + ReplaceConfirmationChoice.SKIP -> doReplace = false + ReplaceConfirmationChoice.SUBSTITUTE_ALL -> doAsk = false + ReplaceConfirmationChoice.QUIT -> { + doReplace = false + gotQuit = true + } + + ReplaceConfirmationChoice.SUBSTITUTE_LAST -> { + doAll = false + line2 = line + } + } + } + if (doReplace) { + setLatestMatch(editor.getText(TextRange(matchRange.startOffset, matchRange.endOffset))) + caret.moveToOffset(matchRange.startOffset) + if (expression != null) { + match = try { + expression.evaluate(editor, injector.executionContextManager.onEditor(editor, null), parent) + .toInsertableString() + } catch (e: Exception) { + exceptions.add(e as ExException) + "" + } + } + + val endPositionWithoutReplace = editor.offsetToBufferPosition(matchRange.endOffset) + + // FIXME: if we received an instance of MutableVimEditor this method might not be necessary + replaceString(editor, matchRange.startOffset, matchRange.endOffset, match) + didReplace = true + + val endPositionWithReplace = editor.offsetToBufferPosition(matchRange.startOffset + match.length) + line += endPositionWithReplace.line - endPositionWithoutReplace.line + line2 += endPositionWithReplace.line - endPositionWithoutReplace.line + } + } + + if (doAll && matchRange.startOffset != matchRange.endOffset) { + if (didReplace) { + // if there was a replacement, we start next search from where the new string ends + val endPosition = editor.offsetToBufferPosition(matchRange.startOffset + match.length) + line = endPosition.line + column = endPosition.column + } else { + // no replacement, so start next search where the match ended + val endPosition = editor.offsetToVisualPosition(matchRange.endOffset) + column = endPosition.column + } + } else { + column = 0 + line++ + } + } + + if (!gotQuit) { + if (lastMatchStartOffset != -1) { + caret.moveToOffset( + injector.motion.moveCaretToLineStartSkipLeading(editor, editor.offsetToBufferPosition(lastMatchStartOffset).line) + ) + } else { + injector.messages.showStatusBarMessage(null, "E486: Pattern not found: $pattern") + } + } + + setLatestMatch("") + + // todo throw multiple exceptions at once + if (exceptions.isNotEmpty()) { + injector.messages.indicateError() + injector.messages.showStatusBarMessage(null, exceptions[0].toString()) + } + + // TODO: Support reporting number of changes (:help 'report') + return true + } + + private fun parseSubstituteCommand( + editor: VimEditor, + range: LineRange, + excmd: String, + exarg: String + ): SubstituteCommandArguments? { + var patternType = if ("~" == excmd) { + // use last used regexp + lastPatternType + } else { + PatternType.SUBSTITUTE // use last substitute regexp + } + + var pattern: String? = "" + val sub: String + val delimiter: Char + var trailingOptionsStartIndex = 0 + // new pattern and substitution + if (excmd[0] == 's' && exarg.isNotEmpty() && !exarg.first().isWhitespace() && !"0123456789cegriIp|\"".contains(exarg.first())) { + // don't accept alphanumeric for separator + if (exarg.first().isLetter()) { + injector.messages.showStatusBarMessage(null, "E146: Regular expressions can't be delimited by letters") + return null + } + + /* + * undocumented vi feature: + * "\/sub/" and "\?sub?" use last used search pattern (almost like + * //sub/r). "\&sub&" use last substitute pattern (like //sub/). + */ + var substituteStringStartIndex = 0 + if (exarg.first() == '\\') { + if (exarg.length < 2 || !"/?&".contains(exarg[1])) { + injector.messages.showStatusBarMessage(null, "E10: \\ should be followed by /, ? or &") + return null + } + if (exarg[1] != '&') { + patternType = PatternType.SEARCH // use last search pattern + } + delimiter = exarg[1] // remember delimiter character + substituteStringStartIndex += 2 + } else { + // find the end of the regexp + patternType = lastPatternType // use last used regexp + delimiter = exarg.first() // remember delimiter character + val endOfPattern = findEndOfPattern(exarg, delimiter, 1) + pattern = exarg.substring(1, endOfPattern) + if (pattern.isEmpty()) pattern = null + substituteStringStartIndex = endOfPattern + if (endOfPattern < exarg.length && exarg[endOfPattern] == delimiter) substituteStringStartIndex++ + } + + /* + * Small incompatibility: vi sees '\n' as end of the command, but in + * Vim we want to use '\n' to find/substitute a NUL. + */ + val tmpSub = exarg.substring(substituteStringStartIndex) // remember the start of the substitution + var substituteStringEndIndex = tmpSub.length + trailingOptionsStartIndex = substituteStringStartIndex + substituteStringEndIndex + for (i in tmpSub.indices) { + if (tmpSub[i] == delimiter && (i == 0 || tmpSub[i - 1] != '\\')) { + substituteStringEndIndex = i + trailingOptionsStartIndex = substituteStringStartIndex + substituteStringEndIndex + 1 + break + } + } + sub = tmpSub.substring(0, substituteStringEndIndex) + + } else { + // use previous pattern and substitution + if (lastSubstituteString == null) { + // there is no previous command + injector.messages.showStatusBarMessage(null, "E33: No previous substitute regular expression") + return null + } + pattern = null + sub = lastSubstituteString!! + "" + } + + // Find trailing options. When '&' is used, keep old options. + if (trailingOptionsStartIndex < exarg.length && exarg[trailingOptionsStartIndex] == '&') { + trailingOptionsStartIndex++ + } else { + // :h :&& - "Note that :s and :& don't keep the flags" + doAll = injector.options(editor).gdefault + doAsk = false + doError = true + doIgnorecase = null + } + var trailingOptionsEndIndex: Int = trailingOptionsStartIndex + for (i in trailingOptionsStartIndex until exarg.length) { + /* + * Note that 'g' and 'c' are always inverted, also when p_ed is off. + * 'r' is never inverted. + */ + if (exarg[i] == 'g') { + doAll = !doAll + } else if (exarg[i] == 'c') { + doAsk = !doAsk + } else if (exarg[i] == 'e') { + doError = !doError + } else if (exarg[i] == 'r') { + // use last used regexp + patternType = lastPatternType + } else if (exarg[i] == 'i') { + // ignore case + doIgnorecase = true + } else if (exarg[i] == 'I') { + // don't ignore case + doIgnorecase = false + } else if (exarg[i] != 'p' && exarg[i] != 'l' && exarg[i] != '#' && exarg[i] != 'n') { + // TODO: Support printing last changed line, with options for line number/list format + // TODO: Support 'n' to report number of matches without substituting + break + } + trailingOptionsEndIndex++ + } + + var line1 = range.startLine + var line2 = range.endLine + + if (line1 < 0 || line2 < 0) { + return null + } + + // check for a trailing count + for (i in trailingOptionsEndIndex until exarg.length) if (exarg[i].isWhitespace()) trailingOptionsEndIndex++ + if (trailingOptionsEndIndex < exarg.length && exarg[trailingOptionsEndIndex].isDigit()) { + var count = 0 + while (trailingOptionsEndIndex < exarg.length && exarg[trailingOptionsEndIndex].isDigit()) { + count = count * 10 + exarg[trailingOptionsEndIndex].digitToInt() + trailingOptionsEndIndex++ + } + if (count <= 0 && doError) { + injector.messages.showStatusBarMessage(null, "Zero count") + return null + } + line1 = line2 + line2 = editor.normalizeLine(line1 + count - 1) + } + + // check for trailing command or garbage + if (trailingOptionsEndIndex < exarg.length && exarg[trailingOptionsEndIndex] != '"') { + // if not end-of-line or comment + injector.messages.showStatusBarMessage(null, "Trailing characters") + return null + } + + // check for trailing command or garbage + if (trailingOptionsEndIndex < exarg.length && exarg[trailingOptionsEndIndex] != '"') { + // if not end-of-line or comment + injector.messages.showStatusBarMessage(null, "Trailing characters") + return null + } + + + var isNewPattern = true + if (pattern == null) { + isNewPattern = false + val errorMessage: String? = when (patternType) { + PatternType.SEARCH -> { + pattern = lastSearchPattern + "E33: No previous substitute regular expression" + } + + PatternType.SUBSTITUTE -> { + pattern = lastSubstitutePattern + "E35: No previous regular expression" + } + else -> null + } + + // Pattern was never defined + if (pattern == null) { + injector.messages.showStatusBarMessage(null, errorMessage) + return null + } + } + + // Set last substitute pattern, but only for explicitly typed patterns. Reused patterns are not saved/updated + setLastUsedPattern(pattern, PatternType.SUBSTITUTE, isNewPattern) + + // Always reset after checking, only set for nv_ident + lastIgnoreSmartCase = false + + // TODO: allow option to force (no)ignore case in a better way + pattern = when (doIgnorecase) { + true -> "\\c$pattern" + false -> "\\C$pattern" + null -> pattern + } + + return SubstituteCommandArguments( + pattern, + sub, + LineRange(line1, line2) + ) + } + /****************************************************************************/ + /* Helper methods */ + /****************************************************************************/ + + private fun setLastUsedPattern( + pattern: String, + patternType: PatternType, + isNewPattern: Boolean, + ) { + // Only update the last pattern with a new input pattern. Do not update if we're reusing the last pattern + if (isNewPattern) { + when (patternType) { + PatternType.SEARCH -> { + lastSearchPattern = pattern + lastPatternType = PatternType.SEARCH + } + PatternType.SUBSTITUTE -> { + lastSubstitutePattern = pattern + lastPatternType = PatternType.SUBSTITUTE + } + } + } + + // Vim never actually sets this register, but looks it up on request + injector.registerGroup.storeTextSpecial(RegisterConstants.LAST_SEARCH_REGISTER, pattern) + + // This will remove an existing entry and add it back to the end, and is expected to do so even if the string value + // is the same + injector.historyGroup.addEntry(HistoryConstants.SEARCH, pattern) + + } + + override fun findDecimalNumber(line: String): Int? { + val regex = Regex("\\d+") + val range = regex.find(line)?.range ?: return null + + return line.substring(range.first, range.last + 1).toInt() + } + + override fun getLastSearchDirection(): Direction { + return lastDirection + } + + /** + * Searches for the last saved pattern, applying the last saved pattern trailing. Will loop over trailing search + * commands. + * + * Make sure that lastPatternOffset has been updated before calling this. wrapscan and ignorecase come from options. + * + * + * See search.c:do_search (and a little bit of normal.c:normal_search) + * + * @param editor The editor to search in + * @param startOffset The offset to search from + * @param count Find the nth occurrence + * @param dir The direction to search in + * @return The offset to the occurrence or -1 if not found + */ + private fun findItOffset( + editor: VimEditor, + startOffset: Int, + count: Int, + dir: Direction, + ): Int { + var startOffsetMutable = startOffset + var offset = 0 + var offsetIsLineOffset = false + var hasEndOffset = false + var pp = ParsePosition(0) + if (lastPatternTrailing!!.isNotEmpty()) { + if (Character.isDigit(lastPatternTrailing!![0]) || lastPatternTrailing!![0] == '+' || lastPatternTrailing!![0] == '-') { + offsetIsLineOffset = true + if (lastPatternTrailing == "+") { + offset = 1 + } else if (lastPatternTrailing == "-") { + offset = -1 + } else { + if (lastPatternTrailing!![0] == '+') { + lastPatternTrailing = lastPatternTrailing!!.substring(1) + } + val nf = NumberFormat.getIntegerInstance() + pp = ParsePosition(0) + val num = nf.parse(lastPatternTrailing, pp) + if (num != null) { + offset = num.toInt() + } + } + } else if ("ebs".indexOf(lastPatternTrailing!![0]) != -1) { + if (lastPatternTrailing!!.length >= 2) { + if ("+-".indexOf(lastPatternTrailing!![1]) != -1) { + offset = 1 + } + val nf = NumberFormat.getIntegerInstance() + pp = ParsePosition(if (lastPatternTrailing!![1] == '+') 2 else 1) + val num = nf.parse(lastPatternTrailing, pp) + if (num != null) { + offset = num.toInt() + } + } + hasEndOffset = lastPatternTrailing!![0] == 'e' + } + } + + /* + * If there is a character offset, subtract it from the current + * position, so we don't get stuck at "?pat?e+2" or "/pat/s-2". + * Skip this if pos.col is near MAXCOL (closed fold). + * This is not done for a line offset, because then we would not be vi + * compatible. + */ + if (!offsetIsLineOffset && offset != 0) { + startOffsetMutable = + max(0, min((startOffsetMutable - offset), (editor.text().length - 1))) + } + val searchOptions = EnumSet.of(SearchOptions.SHOW_MESSAGES, SearchOptions.WHOLE_FILE) + if (dir === Direction.BACKWARDS) searchOptions.add(SearchOptions.BACKWARDS) + if (lastIgnoreSmartCase) searchOptions.add(SearchOptions.IGNORE_SMARTCASE) + if (hasEndOffset) searchOptions.add(SearchOptions.WANT_ENDPOS) + + // Uses last pattern. We know this is always set before being called + val range = injector.searchHelper.findPattern(editor, getLastUsedPattern(), startOffsetMutable, count, searchOptions) ?: return -1 + + var res = range.startOffset + if (offsetIsLineOffset) { + val line: Int = editor.offsetToBufferPosition(range.startOffset).line + val newLine: Int = editor.normalizeLine(line + offset) + + // TODO: Don't move the caret! + res = injector.motion.moveCaretToLineStart(editor, newLine) + } else if (hasEndOffset || offset != 0) { + val base = if (hasEndOffset) range.endOffset - 1 else range.startOffset + res = max(0, min((base + offset), (editor.text().length - 1))) + } + var ppos = pp.index + if (ppos < lastPatternTrailing!!.length - 1 && lastPatternTrailing!![ppos] == ';') { + val nextDir: Direction = if (lastPatternTrailing!![ppos + 1] == '/') { + Direction.FORWARDS + } else if (lastPatternTrailing!![ppos + 1] == '?') { + Direction.BACKWARDS + } else { + return res + } + if (lastPatternTrailing!!.length - ppos > 2) { + ppos++ + } + res = processSearchCommand(editor, lastPatternTrailing!!.substring(ppos + 1), res, nextDir) + } + return res + } + + /** + * Sets the last search state, purely for tests + * + * @param pattern The pattern to save. This is the last search pattern, not the last substitute pattern + * @param patternOffset The pattern offset, e.g. `/{pattern}/{offset}` + * @param direction The direction to search + */ + @TestOnly + public fun setLastSearchState( + pattern: String, + patternOffset: String, + direction: Direction, + ) { + setLastUsedPattern(pattern, PatternType.SEARCH, true) + lastIgnoreSmartCase = false + lastPatternTrailing = patternOffset + lastDirection = direction + } + + /** + * Resets the last state, purely for tests + */ + @TestOnly + public open fun resetState() { + lastPatternType = PatternType.SEARCH + lastSubstitutePattern = null + lastSearchPattern = null + lastPatternTrailing = "" + lastIgnoreSmartCase = false + lastDirection = Direction.FORWARDS + } + + + protected enum class PatternType { + SEARCH, + SUBSTITUTE, + } + + protected enum class ReplaceConfirmationChoice { + SUBSTITUTE_THIS, + SKIP, + SUBSTITUTE_ALL, + QUIT, + SUBSTITUTE_LAST, + } +} + +private data class SubstituteCommandArguments( + val pattern: String, + val substituteString: String, + val range: LineRange, +) \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimSearchHelper.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimSearchHelper.kt index 53b342d12b..b64d036957 100644 --- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimSearchHelper.kt +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimSearchHelper.kt @@ -10,6 +10,7 @@ package com.maddyhome.idea.vim.api import com.maddyhome.idea.vim.common.TextRange import com.maddyhome.idea.vim.helper.SearchOptions +import org.jetbrains.annotations.NotNull import java.util.* public interface VimSearchHelper { @@ -40,7 +41,7 @@ public interface VimSearchHelper { editor: VimEditor, caret: ImmutableVimCaret, type: Char, - dir: Int, + direction: Int, count: Int, ): Int @@ -129,6 +130,18 @@ public interface VimSearchHelper { */ public fun findNextWordEnd(editor: VimEditor, searchFrom: Int, count: Int, bigWord: Boolean, spaceWords: Boolean): Int + /** + * Find text matching the given pattern. + * + * @see :help /pattern + * + * @param editor The editor to search in + * @param pattern The pattern to search for + * @param startOffset The offset to start searching from + * @param count Find the nth next occurrence of the pattern. Must be 1 or greater. + * @param searchOptions A set of options, such as direction and wrap + * @return A TextRange representing the result, or null + */ public fun findPattern( editor: VimEditor, pattern: String?, @@ -137,6 +150,24 @@ public interface VimSearchHelper { searchOptions: EnumSet?, ): TextRange? + /** + * Find all occurrences of the pattern. + * + * @param editor The editor to search in + * @param pattern The pattern to search for + * @param startLine The start line of the range to search for + * @param endLine The end line of the range to search for, or -1 for the whole document + * @param ignoreCase Case sensitive or insensitive searching + * @return A list of TextRange objects representing the results + */ + public fun findAll( + editor: VimEditor, + pattern: String, + startLine: Int, + endLine: Int, + ignoreCase: Boolean + ): List + public fun findNextCharacterOnLine( editor: VimEditor, caret: ImmutableVimCaret, @@ -190,6 +221,17 @@ public interface VimSearchHelper { isOuter: Boolean, ): TextRange? + /** + * Find block enclosing the caret + * + * @param editor The editor to search in + * @param caret The caret currently at + * @param type The type of block, e.g. (, [, {, < + * @param count Find the nth next occurrence of the block + * @param isOuter Control whether the match includes block character + * @return When block is found, return text range matching where end offset is exclusive, + * otherwise return null + */ public fun findBlockRange( editor: VimEditor, caret: ImmutableVimCaret, diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimSearchHelperBase.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimSearchHelperBase.kt index db4d489238..5f1960dfcb 100644 --- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimSearchHelperBase.kt +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/VimSearchHelperBase.kt @@ -13,8 +13,14 @@ import com.maddyhome.idea.vim.common.TextRange import com.maddyhome.idea.vim.diagnostic.vimLogger import com.maddyhome.idea.vim.helper.CharacterHelper import com.maddyhome.idea.vim.helper.CharacterHelper.charType +import com.maddyhome.idea.vim.helper.SearchOptions +import com.maddyhome.idea.vim.regexp.VimRegex +import com.maddyhome.idea.vim.regexp.VimRegexException +import com.maddyhome.idea.vim.regexp.VimRegexOptions +import com.maddyhome.idea.vim.regexp.match.VimMatchResult import org.jetbrains.annotations.Contract import org.jetbrains.annotations.Range +import java.util.* import kotlin.math.abs import kotlin.math.min @@ -46,13 +52,87 @@ public abstract class VimSearchHelperBase : VimSearchHelper { return doFindNext(editor, searchFrom, count, bigWord, spaceWords, ::findNextWordEndOne) } + override fun findPattern( + editor: VimEditor, + pattern: String?, + startOffset: Int, + count: Int, + searchOptions: EnumSet?, + ): TextRange? { + if (pattern.isNullOrEmpty()) return null + + val dir = if (searchOptions!!.contains(SearchOptions.BACKWARDS)) Direction.BACKWARDS else Direction.FORWARDS + + val options: MutableList = mutableListOf() + if (injector.globalOptions().smartcase && !searchOptions.contains(SearchOptions.IGNORE_SMARTCASE)) options.add(VimRegexOptions.SMART_CASE) + if (injector.globalOptions().ignorecase) options.add(VimRegexOptions.IGNORE_CASE) + if (injector.globalOptions().wrapscan) options.add(VimRegexOptions.WRAP_SCAN) + if (searchOptions.contains(SearchOptions.WANT_ENDPOS)) options.add(VimRegexOptions.WANT_END_POSITION) + + val regex = try { + VimRegex(pattern) + } catch (e: VimRegexException) { + injector.messages.showStatusBarMessage(editor, e.message) + return null + } + + var result = + if (dir === Direction.FORWARDS) regex.findNext(editor, startOffset, options) + else regex.findPrevious(editor, startOffset, options) + + if (result is VimMatchResult.Failure) { + injector.messages.showStatusBarMessage(editor, "Pattern not found: $pattern") + return null + } + + for (i in 1 until count) { + val nextOffset = (result as VimMatchResult.Success).range.startOffset + result = + if (dir === Direction.FORWARDS) regex.findNext(editor, nextOffset, options) + else regex.findPrevious(editor, nextOffset, options) + } + + return if (result is VimMatchResult.Success) { + result.range + } else { + injector.messages.showStatusBarMessage(editor, "Pattern not found: $pattern") + null + } + } + + override fun findAll( + editor: VimEditor, + pattern: String, + startLine: Int, + endLine: Int, + ignoreCase: Boolean, + ): List { + val options: MutableList = mutableListOf() + if (injector.globalOptions().smartcase) options.add(VimRegexOptions.SMART_CASE) + if (injector.globalOptions().ignorecase) options.add(VimRegexOptions.IGNORE_CASE) + val regex = try { + // TODO: find better way to this with this force ignore case + val newPattern = (if (ignoreCase) "\\c" else "\\C") + pattern + VimRegex(newPattern) + } catch (e: VimRegexException) { + injector.messages.showStatusBarMessage(editor, e.message) + return emptyList() + } + return regex.findAll( + editor, + editor.getLineStartOffset(startLine), + editor.getLineEndOffset(if (endLine == -1) editor.lineCount() - 1 else endLine) + 1, + options + ).map { it.range } + } + private fun doFindNext( editor: VimEditor, searchFrom: Int, countDirection: Int, bigWord: Boolean, spaceWords: Boolean, - action: (VimEditor, pos: Int, size: Int, step: Int, bigWord: Boolean, spaceWords: Boolean) -> Int + action: (VimEditor, pos: Int, size: Int, step: Int, bigWord: Boolean, spaceWords: Boolean) -> Int, ): Int { var count = countDirection val step = if (count >= 0) 1 else -1 diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/stubs/VimProcessGroupStub.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/stubs/VimProcessGroupStub.kt index a3d291d33d..baebd1ac3d 100644 --- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/stubs/VimProcessGroupStub.kt +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/api/stubs/VimProcessGroupStub.kt @@ -13,6 +13,7 @@ import com.maddyhome.idea.vim.api.VimEditor import com.maddyhome.idea.vim.api.VimProcessGroupBase import com.maddyhome.idea.vim.command.Command import com.maddyhome.idea.vim.diagnostic.vimLogger +import com.maddyhome.idea.vim.state.mode.Mode import javax.swing.KeyStroke public class VimProcessGroupStub : VimProcessGroupBase() { @@ -22,6 +23,10 @@ public class VimProcessGroupStub : VimProcessGroupBase() { override val lastCommand: String get() = TODO("Not yet implemented") + override val isCommandProcessing: Boolean + get() = TODO("Not yet implemented") + override val modeBeforeCommandProcessing: Mode? + get() = TODO("Not yet implemented") override fun startSearchCommand(editor: VimEditor, context: ExecutionContext, count: Int, leader: Char) { TODO("Not yet implemented") diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/common/TextRange.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/common/TextRange.kt index 95c9b76dcd..b497528398 100644 --- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/common/TextRange.kt +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/common/TextRange.kt @@ -73,7 +73,9 @@ public data class TextRange(public val startOffsets: IntArray, public val endOff return true } - public operator fun contains(offset: Int): Boolean = if (isMultiple) false else offset in startOffset until endOffset + public operator fun contains(offset: Int): Boolean { + return (0 until size()).any { offset in startOffsets[it] until endOffsets[it] } + } override fun toString(): String { @NonNls val sb = StringBuilder() diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/CharPointer.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/CharPointer.kt index bb65924022..a10de98513 100644 --- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/CharPointer.kt +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/CharPointer.kt @@ -10,6 +10,7 @@ package com.maddyhome.idea.vim.regexp import java.nio.CharBuffer import java.util.* +@Deprecated("Remove once old regex engine is removed") public class CharPointer { private var seq: CharSequence private var pointer = 0 diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/CharacterClasses.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/CharacterClasses.kt index 89b5edbedc..98170bd35f 100644 --- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/CharacterClasses.kt +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/CharacterClasses.kt @@ -9,6 +9,7 @@ package com.maddyhome.idea.vim.regexp import org.jetbrains.annotations.NonNls +@Deprecated("Remove once old regex engine is removed") public object CharacterClasses { public val CLASS_NAMES: @NonNls Array = arrayOf( "alnum:]", diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/Flags.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/Flags.kt index a9cd9eab3c..d8058ea135 100644 --- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/Flags.kt +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/Flags.kt @@ -7,6 +7,7 @@ */ package com.maddyhome.idea.vim.regexp +@Deprecated("Remove once old regex engine is removed") public class Flags { private var flags: Int diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/Magic.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/Magic.kt index 587d7f6e06..ba9138dc43 100644 --- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/Magic.kt +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/Magic.kt @@ -7,6 +7,7 @@ */ package com.maddyhome.idea.vim.regexp +@Deprecated("Remove once old regex engine is removed") public object Magic { public const val AMP: Int = '&'.code - 256 public const val AT: Int = '@'.code - 256 diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/VimRegex.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/VimRegex.kt new file mode 100644 index 0000000000..72165bfad8 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/VimRegex.kt @@ -0,0 +1,513 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp + +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.engine.VimRegexEngine +import com.maddyhome.idea.vim.regexp.engine.nfa.NFA +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.DotMatcher +import com.maddyhome.idea.vim.regexp.match.VimMatchResult +import com.maddyhome.idea.vim.regexp.parser.CaseSensitivitySettings +import com.maddyhome.idea.vim.regexp.parser.VimRegexParser +import com.maddyhome.idea.vim.regexp.parser.VimRegexParserResult +import com.maddyhome.idea.vim.regexp.parser.visitors.PatternVisitor + +/** + * Represents a compiled Vim pattern. Provides methods to + * match, replace and split strings in the editor with a pattern. + * + * @see :help /pattern + * + */ +public class VimRegex(pattern: String) { + /** + * TODO: in my opinion only the find() and findAll() methods are necessary. + * + * The replace methods (not present here) should probably be implemented + * somewhere else, using the find() or findAll() methods. + * + * The rest of the methods are just useless in my opinion + */ + + /** + * Case sensitivity settings determined by the parser + */ + private val caseSensitivitySettings: CaseSensitivitySettings + + /** + * The NFA representing the compiled pattern + */ + private val nfa: NFA + + /** + * The NFA representing the compiled pattern, preceded by anything + * Equivalent to ".*pattern" + */ + private val nonExactNFA: NFA + + /** + * Whether the pattern contains any upper case literal character + */ + private val hasUpperCase: Boolean + + init { + val parseResult = VimRegexParser.parse(pattern) + + when (parseResult) { + is VimRegexParserResult.Failure -> throw VimRegexException(parseResult.errorCode.toString()) + is VimRegexParserResult.Success -> { + nfa = PatternVisitor.visit(parseResult.tree) + hasUpperCase = PatternVisitor.hasUpperCase + nonExactNFA = NFA.fromMatcher(DotMatcher(false)).closure(false).concatenate(nfa) + caseSensitivitySettings = parseResult.caseSensitivitySettings + } + } + } + + /** + * Indicates whether the pattern can find at least one match in the specified editor + * + * @param editor The editor where to look for the match in + * + * @return True if any match was found, false otherwise + */ + public fun containsMatchIn( + editor: VimEditor, + options: List = emptyList() + ): Boolean { + for (line in 0 until editor.lineCount()) { + val result = simulateNonExactNFA(editor, editor.getLineStartOffset(line), options) + if (result is VimMatchResult.Success) return true + } + + /** + * Entire editor was searched, but no match found + */ + return false + } + + /** + * Returns the first match of a pattern in the editor, that comes after the startIndex + * + * @param editor The editor where to look for the match in + * @param startIndex The index to start the find + * + * @return The first match found in the editor after startIndex + */ + public fun findNext( + editor: VimEditor, + startIndex: Int = 0, + options: List = emptyList() + ): VimMatchResult { + /* + if the startIndex is at the end of a line, start searching at the next position, + to avoid the cursor getting stuck at line ends + */ + val newStartIndex = + if (startIndex + 1 == editor.getLineEndOffset(editor.offsetToBufferPosition(startIndex).line)) startIndex + 1 + else startIndex + + val lineStartIndex = editor.getLineStartOffset(editor.offsetToBufferPosition(newStartIndex).line) + var index = lineStartIndex + while (index <= editor.text().length) { + val result = simulateNonExactNFA(editor, index, options) + index = when (result) { + is VimMatchResult.Success -> { + // the match comes after the startIndex, return it + if (result.range.startOffset > newStartIndex) return result + // there is a match but starts before the startIndex, try again starting from the end of this match + else result.range.endOffset + if (result.range.startOffset == result.range.endOffset) 1 else 0 + } + // no match starting here, try the next line + is VimMatchResult.Failure -> { + val nextLine = editor.offsetToBufferPosition(index).line + 1 + if (nextLine >= editor.lineCount()) break + editor.getLineStartOffset(nextLine) + } + } + } + // no match found after startIndex, try wrapping around to file start, if wrapscan is set + if (options.contains(VimRegexOptions.WRAP_SCAN)) { + index = 0 + while (index <= startIndex) { + val result = simulateNonExactNFA(editor, index, options) + // just return the first match found + when (result) { + is VimMatchResult.Success -> return result + is VimMatchResult.Failure -> { + val nextLine = editor.offsetToBufferPosition(index).line + 1 + if (nextLine >= editor.lineCount()) break + index = editor.getLineStartOffset(nextLine) + } + } + } + } + // entire editor was searched, but no match found + return VimMatchResult.Failure(VimRegexErrors.E486) + } + + /** + * Returns the first match of a pattern in the editor, that comes before the startIndex + * + * @param editor The editor where to look for the match in + * @param startIndex The index to start the find + * + * @return The first match found in the editor before startIndex + */ + public fun findPrevious( + editor: VimEditor, + startIndex: Int = 0, + options: List = emptyList() + ): VimMatchResult { + val startLine = editor.offsetToBufferPosition(startIndex).line + val result = findLastMatchInLine(editor, startLine, startIndex - 1, options) + if (result is VimMatchResult.Success && result.range.startOffset < startIndex) { + // there is a match at this line that starts before the startIndex + return result + } else { + // try searching in previous lines, line by line, and if necessary wrap around to the last line if wrapscan is set + var currentLine = startLine - 1 + var wrappedAround = false + while (!(wrappedAround && (currentLine < startLine || !options.contains(VimRegexOptions.WRAP_SCAN)))) { + if (currentLine < 0) { + currentLine = editor.lineCount() - 1 + wrappedAround = true + } else { + val previous = findLastMatchInLine(editor, currentLine, options=options) + if (previous is VimMatchResult.Success) return previous + else currentLine-- + } + } + // there are no matches in the entire file + return VimMatchResult.Failure(VimRegexErrors.E486) + } + } + + /** + * Finds the last match that starts at line, before maxIndex + * + * @param editor The editor where to look for the match in + * @param line The where the match should start + * @param maxIndex The maximum index (exclusive) where the match should start + * + * @return The last match found, if any + */ + private fun findLastMatchInLine( + editor: VimEditor, + line: Int, + maxIndex: Int = editor.getLineEndOffset(line), + options: List + ): VimMatchResult { + var index = editor.getLineStartOffset(line) + var prevResult: VimMatchResult = VimMatchResult.Failure(VimRegexErrors.E486) + val returnEndPosition = options.contains(VimRegexOptions.WANT_END_POSITION) + while (index <= maxIndex) { + val result = simulateNonExactNFA(editor, index, options) + when (result) { + // no more matches in this line, break out of the loop + is VimMatchResult.Failure -> break + is VimMatchResult.Success -> { + // no more relevant matches in this line, break out of the loop + if ((!returnEndPosition && result.range.startOffset > maxIndex) || (returnEndPosition && result.range.endOffset > maxIndex)) break + + // match found, try to find more after it + prevResult = result + index = if (result.range.startOffset == result.range.endOffset) result.range.endOffset + 1 else result.range.endOffset + } + } + } + // return the last found match in the line, if any + return prevResult + } + + /** + * Returns a sequence of all occurrences of a pattern within + * the editor, beginning at the specified index + * + * @param editor The editor where to look for the match in + * @param startIndex The index to start the find + * + * @return All the matches found in the editor + */ + public fun findAll( + editor: VimEditor, + startIndex: Int = 0, + maxIndex: Int = editor.text().length, + options: List = emptyList() + ): List { + var index = startIndex + val foundMatches: MutableList = emptyList().toMutableList() + while (index < maxIndex) { + val result = simulateNonExactNFA(editor, index, options) + when (result) { + /** + * A match was found, add it to foundMatches and increment + * next index accordingly + */ + is VimMatchResult.Success -> { + foundMatches.add(result) + index = if (result.range.startOffset == result.range.endOffset) result.range.endOffset + 1 + else result.range.endOffset + } + + /** + * No match found starting on this index, try searching on next line + */ + is VimMatchResult.Failure -> { + val nextLine = editor.offsetToBufferPosition(index).line + 1 + if (nextLine >= editor.lineCount()) break + index = editor.getLineStartOffset(nextLine) + } + } + } + return foundMatches + } + + /** + * Searches for a match of a pattern on a give line, starting at a certain column. + * + * @param editor The editor where to look for the match in + * @param line The number of the line where to look for the match in + * @param column The column of that line where to start looking for a match + */ + public fun findInLine( + editor: VimEditor, + line: Int, + column: Int = 0, + options: List = emptyList() + ): VimMatchResult { + return simulateNonExactNFA(editor, editor.getLineStartOffset(line) + column, options) + } + + /** + * "Simulates" the substitution of the match of a pattern with a substitution string. + * + * Substitution is not actually performed here, only simulated, since it may still be pending + * confirmation from the user. + * + * @param editor The editor where to look for the match and perform the substitution in + * @param substituteString The string used for substitution. Can either be taken literally or contain characters with a special meaning + * @param lastSubstituteString The substitution string lastly used. + * @param line The line to simulate the substitution in + * @param column The column of that line where to start looking for a match + * @param takeLiterally Whether to always take the string literally, meaning no character has a special meaning + */ + public fun substitute( + editor: VimEditor, + substituteString: String, + lastSubstituteString: String, + line: Int, + column: Int = 0, + takeLiterally: Boolean = false, + options: List = emptyList() + ): Pair? { + val match = findInLine(editor, line, column, options) + return when (match) { + is VimMatchResult.Failure -> null + is VimMatchResult.Success -> Pair(match, if (takeLiterally) substituteString else buildSubstituteString(match, substituteString, lastSubstituteString)) + } + } + + private fun buildSubstituteString( + matchResult: VimMatchResult.Success, + substituteString: String, + lastSubstituteString: String, + magic: Boolean = true + ): String { + val result = StringBuilder() + var caseSettings: SubstituteCase = SubstituteCase.DEFAULT + + var index = 0 + while (index < substituteString.length) { + if (substituteString[index] == '\\') { + index++ + if (index >= substituteString.length) { + result.append('\\') + break + } + when (substituteString[index]) { + '&' -> result.append(if (magic) '&' else matchResult.value) + '~' -> result.append(if (magic) '~' else buildSubstituteString(matchResult, lastSubstituteString, "", false)) + '0' -> result.append(matchResult.value) + // TODO: check for illegal back references + '1' -> result.append(matchResult.groups.get(1)?.value) + '2' -> result.append(matchResult.groups.get(2)?.value) + '3' -> result.append(matchResult.groups.get(3)?.value) + '4' -> result.append(matchResult.groups.get(4)?.value) + '5' -> result.append(matchResult.groups.get(5)?.value) + '6' -> result.append(matchResult.groups.get(6)?.value) + '7' -> result.append(matchResult.groups.get(7)?.value) + '8' -> result.append(matchResult.groups.get(8)?.value) + '9' -> result.append(matchResult.groups.get(9)?.value) + 'u' -> caseSettings = SubstituteCase.UPPER + 'U' -> caseSettings = SubstituteCase.UPPER_PERSISTENT + 'l' -> caseSettings = SubstituteCase.LOWER + 'L' -> caseSettings = SubstituteCase.LOWER_PERSISTENT + 'e' -> caseSettings = SubstituteCase.DEFAULT + 'E' -> caseSettings = SubstituteCase.DEFAULT + 'r' -> result.append('\n') + 'n' -> result.append('\u0000') + 'b' -> result.append('\b') + 't' -> result.append('\t') + '\\' -> result.append('\\') + else -> { + val buildResult = buildLiteralChar(substituteString[index], caseSettings) + caseSettings = buildResult.second + result.append(buildResult.first) + } + } + } else if (substituteString[index] == '&' && magic) { + result.append(matchResult.value) + } else if (substituteString[index] == '~' && magic) { + result.append(buildSubstituteString(matchResult, lastSubstituteString, "", true)) + } else { + val buildResult = buildLiteralChar(substituteString[index], caseSettings) + caseSettings = buildResult.second + result.append(buildResult.first) + } + index++ + } + + return result.toString() + } + + private fun buildLiteralChar( + char: Char, + caseSettings: SubstituteCase + ): Pair { + return when (caseSettings) { + SubstituteCase.DEFAULT -> Pair(char, caseSettings) + SubstituteCase.UPPER -> Pair(char.uppercaseChar(), SubstituteCase.DEFAULT) + SubstituteCase.UPPER_PERSISTENT -> Pair(char.uppercaseChar(), caseSettings) + SubstituteCase.LOWER -> Pair(char.lowercaseChar(), SubstituteCase.DEFAULT) + SubstituteCase.LOWER_PERSISTENT -> Pair(char.lowercaseChar(), caseSettings) + } + } + + private enum class SubstituteCase { + DEFAULT, + UPPER, + UPPER_PERSISTENT, + LOWER, + LOWER_PERSISTENT, + } + + /** + * Attempts to match a pattern exactly at the specified + * index in the editor text. + * + * @param editor The editor where to look for the match in + * @param index The index to start the match + * + * @return The match, either successful or not, found at the specified index + */ + public fun matchAt( + editor: VimEditor, + index: Int, + options: List = emptyList() + ): VimMatchResult { + return simulateNFA(editor, index, options) + } + + /** + * Attempts to match the entire editor against the pattern. + * + * @param editor The editor where to look for the match in + * + * @return The match, either successful or not, when matching against the entire editor + */ + public fun matchEntire( + editor: VimEditor, + options: List = emptyList() + ): VimMatchResult { + val result = simulateNFA(editor, options=options) + return when (result) { + is VimMatchResult.Failure -> result + is VimMatchResult.Success -> { + if (result.range.endOffset == editor.text().length) result + else VimMatchResult.Failure(VimRegexErrors.E486) // create a more appropriate error code? + } + } + } + + /** + * Indicates whether the pattern matches the entire editor. + * + * @param editor The editor where to look for the match in + * + * @return True if the entire editor matches, false otherwise + */ + public fun matches( + editor: VimEditor, + options: List = emptyList() + ): Boolean { + val result = simulateNFA(editor, options=options) + return when (result) { + is VimMatchResult.Failure -> false + is VimMatchResult.Success -> result.range.endOffset == editor.text().length + } + } + + /** + * Checks if a pattern matches a part of the editor + * starting exactly at the specified index. + * + * @param editor The editor where to look for the match in + * + * @return True if there is a successful match starting at the specified index, false otherwise + */ + public fun matchesAt( + editor: VimEditor, + index: Int, + options: List = emptyList() + ): Boolean { + return when (simulateNFA(editor, index, options)) { + is VimMatchResult.Success -> true + is VimMatchResult.Failure -> false + } + } + + /** + * Simulates the internal NFA with the determined flags, + * started on a given index. + * + * @param editor The editor that is used for the simulation + * @param index The index where the simulation should start + * + * @return The resulting match result + */ + private fun simulateNFA(editor: VimEditor, index: Int = 0, options: List): VimMatchResult { + return VimRegexEngine.simulate(nfa, editor, index, shouldIgnoreCase(options)) + } + + /** + * Simulates the internal non-exact NFA with the determined flags, + * started on a given index. + * + * @param editor The editor that is used for the simulation + * @param index The index where the simulation should start + * + * @return The resulting match result + */ + private fun simulateNonExactNFA(editor: VimEditor, index: Int = 0, options: List): VimMatchResult { + return VimRegexEngine.simulate(nonExactNFA, editor, index, shouldIgnoreCase(options)) + } + + /** + * Determines, based on information that comes from the parser and other + * options that may be set, whether to ignore case. + */ + private fun shouldIgnoreCase(options: List): Boolean { + return when (caseSensitivitySettings) { + CaseSensitivitySettings.NO_IGNORE_CASE -> false + CaseSensitivitySettings.IGNORE_CASE -> true + CaseSensitivitySettings.DEFAULT -> options.contains(VimRegexOptions.IGNORE_CASE) && !(options.contains(VimRegexOptions.SMART_CASE) && hasUpperCase) + } + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/VimRegexErrors.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/VimRegexErrors.kt new file mode 100644 index 0000000000..df52cfd9a6 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/VimRegexErrors.kt @@ -0,0 +1,24 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp + +/** + * Error codes related to Vim regular expressions + */ +public enum class VimRegexErrors { + /** + * Invalid search string + */ + E383, + + /** + * Pattern not found + */ + E486, +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/VimRegexException.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/VimRegexException.kt new file mode 100644 index 0000000000..4dc52387d1 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/VimRegexException.kt @@ -0,0 +1,11 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp + +public class VimRegexException(override val message: String) : RuntimeException(message) \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/VimRegexOptions.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/VimRegexOptions.kt new file mode 100644 index 0000000000..6f961bc6c8 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/VimRegexOptions.kt @@ -0,0 +1,16 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp + +public enum class VimRegexOptions { + IGNORE_CASE, + SMART_CASE, + WRAP_SCAN, + WANT_END_POSITION, +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/VimRegexEngine.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/VimRegexEngine.kt new file mode 100644 index 0000000000..a61bf4ce04 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/VimRegexEngine.kt @@ -0,0 +1,47 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine + +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.VimRegexErrors +import com.maddyhome.idea.vim.regexp.engine.nfa.NFA +import com.maddyhome.idea.vim.regexp.engine.strategies.BacktrackingStrategy +import com.maddyhome.idea.vim.regexp.engine.strategies.ImplicitDFAStrategy +import com.maddyhome.idea.vim.regexp.engine.strategies.SimulationResult +import com.maddyhome.idea.vim.regexp.engine.strategies.SimulationStrategy +import com.maddyhome.idea.vim.regexp.match.VimMatchResult + +/** + * A meta-engine for simulating a nfa. It combines strategies that can be used to simulate the nfa, + * some more powerful but slower, others less powerful but faster. The engine combines these strategies, + * in order to always use the strategy that is less powerful (and thus faster), but powerful enough to + * simulate the nfa. + * This is a singleton. + */ +internal object VimRegexEngine { + + // TODO: optimize by adding more strategies. The strategies should go from less powerful but faster, to more powerful but slower + /** + * The list of strategies that the engine has available. They should be ordered from less powerful to more powerful. + */ + private val strategies: List = listOf(BacktrackingStrategy()) + + /** + * Simulate the nfa using the available strategies. The approach used is very simple: start with the least powerful + * strategy; if this strategy is powerful enough to determine if there is a match, return that match. If it isn't + * powerful enough, use the next (more powerful) strategy. + */ + internal fun simulate(nfa: NFA, editor: VimEditor, startIndex: Int = 0, isCaseInsensitive: Boolean = false): VimMatchResult { + for (strategy in strategies) { + val result = strategy.simulate(nfa, editor, startIndex, isCaseInsensitive) + if (result is SimulationResult.Complete) return result.matchResult + } + return VimMatchResult.Failure(VimRegexErrors.E486) + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/NFA.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/NFA.kt new file mode 100644 index 0000000000..8d3c3d88c0 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/NFA.kt @@ -0,0 +1,221 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa + +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.EpsilonMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.Matcher + +/** + * Represents a non-deterministic finite automaton. + */ +internal class NFA private constructor( + /** + * The start state of the NFA + */ + internal var startState: NFAState, + /** + * The end state of the NFA + */ + internal var acceptState: NFAState +) { + + /** + * Concatenates the NFA with another NFA. The new NFA accepts inputs + * that are accepted by the old NFA followed by the other. + * + * @param other The NFA to concatenate with + * + * @return The new NFA representing the concatenation + */ + internal fun concatenate(other: NFA): NFA { + this.acceptState.addTransition( + NFATransition( + EpsilonMatcher(), + other.startState + ) + ) + + this.acceptState = other.acceptState + + return this + } + + /** + * Unifies the NFA with another NFA. The new NFA accepts inputs + * that are accepted by either the old NFA or the other. + * + * @param other The NFA to unify with + * + * @return The new NFA representing the union + */ + internal fun unify(other: NFA): NFA { + val newStart = NFAState() + val newEnd = NFAState() + + newStart.addTransition(NFATransition(EpsilonMatcher(), this.startState)) + newStart.addTransition(NFATransition(EpsilonMatcher(), other.startState)) + + this.acceptState.addTransition(NFATransition(EpsilonMatcher(), newEnd)) + other.acceptState.addTransition(NFATransition(EpsilonMatcher(), newEnd)) + + this.startState = newStart + this.acceptState = newEnd + + return this + } + + /** + * Kleene's closure of the NFA. Allows the NFA to "loop" any amount of times. + * + * @param isGreedy Whether the NFA should give priority to consuming as much input as possible + * + * @return The new NFA representing the closure + */ + internal fun closure(isGreedy: Boolean): NFA { + val newStart = NFAState() + val newEnd = NFAState() + + if (isGreedy){ + newStart.addTransition(NFATransition(EpsilonMatcher(), startState)) + newStart.addTransition(NFATransition(EpsilonMatcher(), newEnd)) + + acceptState.addTransition(NFATransition(EpsilonMatcher(), startState)) + acceptState.addTransition(NFATransition(EpsilonMatcher(), newEnd)) + } else { + newStart.addTransition(NFATransition(EpsilonMatcher(), newEnd)) + newStart.addTransition(NFATransition(EpsilonMatcher(), startState)) + + acceptState.addTransition(NFATransition(EpsilonMatcher(), newEnd)) + acceptState.addTransition(NFATransition(EpsilonMatcher(), startState)) + } + + startState = newStart + acceptState = newEnd + + return this + } + + /** + * Gives the NFA the choice to jump directly from its start to + * accept state, without taking any of the inner transitions. + * + * @param isGreedy Whether the NFA should give priority to consuming as much input as possible + * + * @return The new NFA, that can be matched optionally + */ + internal fun optional(isGreedy: Boolean): NFA { + val newStart = NFAState() + val newEnd = NFAState() + + if (isGreedy) { + newStart.addTransition(NFATransition(EpsilonMatcher(), startState)) + newStart.addTransition(NFATransition(EpsilonMatcher(), newEnd)) + } + else { + newStart.addTransition(NFATransition(EpsilonMatcher(), newEnd)) + newStart.addTransition(NFATransition(EpsilonMatcher(), startState)) + } + + acceptState.addTransition(NFATransition(EpsilonMatcher(), newEnd)) + startState = newStart + acceptState = newEnd + + return this + } + + /** + * Marks the start and accept states of the NFA to start + * and end, respectfully, the capturing of a group. + * + * @param groupNumber The number of the capture group + * @param force Whether the state should force-end the capturing of the group + */ + internal fun capture(groupNumber: Int, force: Boolean = true) { + this.startState.startCapture.add(groupNumber) + if (force) this.acceptState.forceEndCapture.add(groupNumber) + else this.acceptState.endCapture.add(groupNumber) + } + + /** + * Marks the NFA to be asserted during simulation. The simulation + * may or may not consume input, and can be positive (simulation must + * succeed) or negative (simulation must fail). + * + * @param shouldConsume Whether the assertion should consume input. + * @param isPositive Whether the assertion is positive or negative. + * + * @return The NFA instance marked for assertion. + */ + internal fun assert(shouldConsume: Boolean, isPositive: Boolean, isAhead: Boolean, limit: Int = 0): NFA { + val newStart = NFAState() + val newEnd = NFAState() + + newStart.assertion = NFAAssertion( + shouldConsume, + isPositive, + isAhead, + startState, + acceptState, + newEnd, + limit + ) + + acceptState = newEnd + startState = newStart + + return this + } + + /** + * Sets the start state of the NFA to mark where the whole match should begin. + */ + internal fun startMatch() { + this.startState.startCapture.add(0) + } + + /** + * Sets the accept state of the NFA to mark where the whole match should end. + */ + internal fun endMatch() { + this.acceptState.forceEndCapture.add(0) + } + + + internal companion object { + + /** + * Creates a new instance of a NFA, that has a single + * state. + * + * @return The new NFA instance + */ + internal fun fromSingleState() : NFA { + val state = NFAState() + return NFA(state, state) + } + + /** + * Creates a new instance of a NFA, that has two states + * with a transition from one state to the other + * defined by a matcher. + * + * start --matcher-> end + * + * @param matcher The matcher used for the transition + * @return The new NFA instance + */ + internal fun fromMatcher(matcher: Matcher) : NFA { + val startState = NFAState() + val acceptState = NFAState() + + startState.addTransition(NFATransition(matcher, acceptState)) + return NFA(startState, acceptState) + } + } +} diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/NFAAssertion.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/NFAAssertion.kt new file mode 100644 index 0000000000..c0a10b2605 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/NFAAssertion.kt @@ -0,0 +1,36 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa + +/** + * Represents an assertion. + * + * @param shouldConsume Whether the simulation should consume the input "consumed" by the assertion. + * @param isPositive True if the assertion is positive, false if negative. + * @param startState The state to jump to, to start the assertion + * @param endState The state where the assertion should end + * @param jumpTo The state that the simulation should jump to, to resume with normal + * simulation after the assertion. + * @param limit Only relevant for lookbehinds. Determines how far back to look for assertion + * + * @see :help /@= + * @see :help /@! + * @see :help /@<= + * @see :help /@<! + * @see :help /@> + */ +internal data class NFAAssertion( + val shouldConsume: Boolean, + val isPositive: Boolean, + val isAhead: Boolean, + val startState: NFAState, + val endState: NFAState, + val jumpTo: NFAState, + val limit: Int, +) \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/NFAState.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/NFAState.kt new file mode 100644 index 0000000000..19611ca29d --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/NFAState.kt @@ -0,0 +1,59 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa + +/** + * Represents a single state of a NFA. + */ +internal class NFAState { + /** + * All the transitions from this state. Order matters. + * Transitions with higher priority should be in lower + * indexes. This is relevant for the implementation of + * lazy quantifiers. + */ + internal val transitions: MutableList = mutableListOf() + + /** + * When a state has an assertion, it has to be asserted + * in order to continue with the simulation. + */ + internal var assertion: NFAAssertion? = null + + /** + * Stores the numbers of the capture groups that start + * being captured on this state + */ + internal val startCapture: MutableList = ArrayList() + + /** + * Stores the number of the capture groups that stop + * being captured on this state + */ + internal val endCapture: MutableList = ArrayList() + + /** + * Stores the number of the capture groups that stop + * being captured on this state, even if that group + * had already been set to stop being captured + */ + internal val forceEndCapture: MutableList = ArrayList() + + internal var hasLazyMulti: Boolean = false + + /** + * Adds a new transition from this state. This transition + * has the lowest priority so far. + * + * @param transition The transition that is to be added + */ + fun addTransition(transition: NFATransition) { + transitions.add(transition) + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/NFATransition.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/NFATransition.kt new file mode 100644 index 0000000000..3a1ea8817b --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/NFATransition.kt @@ -0,0 +1,25 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa + +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.Matcher + +/** + * Represents a transition of the NFA. + * + * @param matcher The matcher that determines if the transition can + * be made, as well as information on how many characters + * are consumed by the transition. + * + * @param destState The destination state of the transition. + */ +internal data class NFATransition( + val matcher: Matcher, + val destState: NFAState, +) \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/BackreferenceMatcher.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/BackreferenceMatcher.kt new file mode 100644 index 0000000000..1921722a54 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/BackreferenceMatcher.kt @@ -0,0 +1,49 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection + +/** + * Matcher used to match against a previously captured group + * + * @param groupNumber The number of the back-referenced captured group + */ +internal class BackreferenceMatcher(private val groupNumber: Int) : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + if (groups.get(groupNumber) == null) { + // TODO: throw illegal backreference error + return MatcherResult.Failure + } + val capturedString = if (isCaseInsensitive) groups.get(groupNumber)!!.value.lowercase() + else groups.get(groupNumber)!!.value + + if (editor.text().length - index < capturedString.length) return MatcherResult.Failure + + val editorString = if (isCaseInsensitive) editor.text().substring(index until index + capturedString.length).lowercase() + else editor.text().substring(index until index + capturedString.length) + + return if (capturedString == editorString) + MatcherResult.Success(capturedString.length) + else + MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return false + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/CharacterMatcher.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/CharacterMatcher.kt new file mode 100644 index 0000000000..e1093c1c05 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/CharacterMatcher.kt @@ -0,0 +1,38 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection + +/** + * Matcher used to match against single characters + */ +internal class CharacterMatcher(val char: Char) : Matcher { + override fun matches( + editor: VimEditor, + index: Int, groups: + VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + if (index >= editor.text().length) return MatcherResult.Failure + + val targetChar = if (isCaseInsensitive) char.lowercaseChar() else char + val editorChar = if (isCaseInsensitive) editor.text()[index].lowercaseChar() else editor.text()[index] + + return if (targetChar == editorChar) MatcherResult.Success(1) + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return false + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/CollectionMatcher.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/CollectionMatcher.kt new file mode 100644 index 0000000000..6882acb768 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/CollectionMatcher.kt @@ -0,0 +1,76 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection + +/** + * Matcher used to match against a character collection. + * + * @param chars The individual characters in the collection + * @param ranges The ranges of characters in the collection + * @param isNegated Whether the Matcher should accept or refuse characters that are in the collection + * @param includesEOL Whether the collection includes the end-of-line + * @param forceNoIgnoreCase If this is set, matching is always case-sensitive + */ +internal class CollectionMatcher( + private val chars: Set = emptySet(), + private val ranges: List = emptyList(), + private val charClasses: List<(Char) -> Boolean> = emptyList(), + private val isNegated: Boolean = false, + private val includesEOL: Boolean = false, + private val forceNoIgnoreCase: Boolean = false +) : Matcher { + override fun matches( + editor: VimEditor, + index: Int, groups: + VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + if (index >= editor.text().length) return MatcherResult.Failure + + if (!includesEOL && editor.text()[index] == '\n') return MatcherResult.Failure + if (includesEOL && editor.text()[index] == '\n') return MatcherResult.Success(1) + + val char = editor.text()[index] + val result = if (isCaseInsensitive && !forceNoIgnoreCase) (chars.map { it.lowercaseChar() }.contains(char.lowercaseChar()) || ranges.any { it.inRange(char, true) } || charClasses.any { it(char.lowercaseChar()) || it(char.uppercaseChar()) }) == !isNegated + else (chars.contains(char) || ranges.any { it.inRange(char) } || charClasses.any { it(char) }) == !isNegated + return if (result) MatcherResult.Success(1) + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return false + } +} + +/** + * Represents a range of characters in a collection + * + * @param start The starting character of the range (inclusive) + * @param end The ending character of the range (inclusive) + */ +internal data class CollectionRange(val start: Char, val end: Char) { + + /** + * Determines whether a character is inside the range + * + * @param char The character to verify + * @param isCaseInsensitive Whether case should be ignored + * + * @return whether char is inside the range + */ + internal fun inRange(char: Char, isCaseInsensitive: Boolean = false) : Boolean { + return if (isCaseInsensitive) char.lowercaseChar().code in start.lowercaseChar().code..end.lowercaseChar().code + else char.code in start.code..end.code + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/ColumnMatchers.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/ColumnMatchers.kt new file mode 100644 index 0000000000..d602b9131b --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/ColumnMatchers.kt @@ -0,0 +1,130 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection + +internal class AtColumnMatcher(private val columnNumber: Int) : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (editor.offsetToBufferPosition(index).column + 1 == columnNumber) MatcherResult.Success(0) + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} + +internal class BeforeColumnMatcher(private val columnNumber: Int) : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (editor.offsetToBufferPosition(index).column + 1 < columnNumber) MatcherResult.Success(0) + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} + +internal class AfterColumnMatcher(private val columnNumber: Int) : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (editor.offsetToBufferPosition(index).column + 1 > columnNumber) MatcherResult.Success(0) + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} + +internal class AtColumnCursorMatcher : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (possibleCursors.any { editor.offsetToBufferPosition(index).column == editor.offsetToBufferPosition(it.offset.point).column }) { + val newPossibleCursors = possibleCursors.filter { editor.offsetToBufferPosition(index).column == editor.offsetToBufferPosition(it.offset.point).column } + possibleCursors.clear() + possibleCursors.addAll(newPossibleCursors) + MatcherResult.Success(0) + } + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} + +internal class BeforeColumnCursorMatcher : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (possibleCursors.any { editor.offsetToBufferPosition(index).column < editor.offsetToBufferPosition(it.offset.point).column }) { + val newPossibleCursors = possibleCursors.filter { editor.offsetToBufferPosition(index).column < editor.offsetToBufferPosition(it.offset.point).column } + possibleCursors.clear() + possibleCursors.addAll(newPossibleCursors) + MatcherResult.Success(0) + } + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} + +internal class AfterColumnCursorMatcher : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (possibleCursors.any { editor.offsetToBufferPosition(index).column > editor.offsetToBufferPosition(it.offset.point).column }) { + val newPossibleCursors = possibleCursors.filter { editor.offsetToBufferPosition(index).column > editor.offsetToBufferPosition(it.offset.point).column } + possibleCursors.clear() + possibleCursors.addAll(newPossibleCursors) + MatcherResult.Success(0) + } + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/CursorMatcher.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/CursorMatcher.kt new file mode 100644 index 0000000000..ae32201e81 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/CursorMatcher.kt @@ -0,0 +1,42 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.common.Offset +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection + +/** + * Matcher that matches if there is a cursor + * on the given index + */ +internal class CursorMatcher : Matcher { + override fun matches( + editor: VimEditor, + index: Int, groups: + VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (possibleCursors.map { it.offset.point }.contains(index)) { + // now the only cursors possible are the ones at this index + val newPossibleCursors = possibleCursors.filter { it.offset.point == index } + possibleCursors.clear() + possibleCursors.addAll(newPossibleCursors) + MatcherResult.Success(0) + } else { + MatcherResult.Failure + } + } + + override fun isEpsilon(): Boolean { + return true + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/DotMatcher.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/DotMatcher.kt new file mode 100644 index 0000000000..cc4c41c5a8 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/DotMatcher.kt @@ -0,0 +1,37 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection + +/** + * Matcher that matches with any character + */ +internal class DotMatcher(private val includeNewLine: Boolean) : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (includeNewLine) + if (index < editor.text().length) MatcherResult.Success(1) + else MatcherResult.Failure + else + if (index < editor.text().length && editor.text()[index] != '\n') MatcherResult.Success(1) + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return false + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/EndOfFileMatcher.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/EndOfFileMatcher.kt new file mode 100644 index 0000000000..e4ca9db0f2 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/EndOfFileMatcher.kt @@ -0,0 +1,33 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection + +/** + * Matcher used to check if index is at the + * end of a file + */ +internal class EndOfFileMatcher : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (index == editor.text().length) MatcherResult.Success(0) else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/EndOfLineMatcher.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/EndOfLineMatcher.kt new file mode 100644 index 0000000000..21ca80d0c6 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/EndOfLineMatcher.kt @@ -0,0 +1,33 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection + +/** + * Matcher used to check if index is at the end of a line. + */ +internal class EndOfLineMatcher : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (index == editor.text().length || editor.text()[index] == '\n') MatcherResult.Success(0) + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/EndOfWordMatcher.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/EndOfWordMatcher.kt new file mode 100644 index 0000000000..57ebd835c4 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/EndOfWordMatcher.kt @@ -0,0 +1,38 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.options.helpers.KeywordOptionHelper +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection + +/** + * Matcher used to check if index is at the end of a word. + */ +internal class EndOfWordMatcher : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + if (index > editor.text().length || index == 0) return MatcherResult.Failure + + val isKeywordAtIndex = editor.text().getOrNull(index)?.let { KeywordOptionHelper.isKeyword(editor, it) } ?: false + val isKeywordBeforeIndex = editor.text().getOrNull(index - 1)?.let { KeywordOptionHelper.isKeyword(editor, it) } ?: false + + return if (isKeywordBeforeIndex && !isKeywordAtIndex) MatcherResult.Success(0) else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/EpsilonMatcher.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/EpsilonMatcher.kt new file mode 100644 index 0000000000..83ccd77ee1 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/EpsilonMatcher.kt @@ -0,0 +1,34 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection + +/** + * Matcher that always matches. It is used to represent + * epsilon transitions. This transitions can always be + * taken and without consuming any character. + */ +internal class EpsilonMatcher : Matcher { + override fun matches( + editor: VimEditor, + index: Int, groups: + VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return MatcherResult.Success(0) + } + + override fun isEpsilon(): Boolean { + return true + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/LineMatchers.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/LineMatchers.kt new file mode 100644 index 0000000000..a0f77154c6 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/LineMatchers.kt @@ -0,0 +1,129 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection + +internal class AtLineMatcher(private val lineNumber: Int) : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (editor.offsetToBufferPosition(index).line + 1 == lineNumber) MatcherResult.Success(0) + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} + +internal class BeforeLineMatcher(private val lineNumber: Int) : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (editor.offsetToBufferPosition(index).line + 1 < lineNumber) MatcherResult.Success(0) + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} + +internal class AfterLineMatcher(private val lineNumber: Int) : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (editor.offsetToBufferPosition(index).line + 1 > lineNumber) MatcherResult.Success(0) + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} +internal class AtLineCursorMatcher : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (possibleCursors.any { editor.offsetToBufferPosition(index).line == editor.offsetToBufferPosition(it.offset.point).line }) { + val newPossibleCursors = possibleCursors.filter { editor.offsetToBufferPosition(index).line == editor.offsetToBufferPosition(it.offset.point).line } + possibleCursors.clear() + possibleCursors.addAll(newPossibleCursors) + MatcherResult.Success(0) + } + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} + +internal class BeforeLineCursorMatcher : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (possibleCursors.any { editor.offsetToBufferPosition(index).line < editor.offsetToBufferPosition(it.offset.point).line }) { + val newPossibleCursors = possibleCursors.filter { editor.offsetToBufferPosition(index).line < editor.offsetToBufferPosition(it.offset.point).line } + possibleCursors.clear() + possibleCursors.addAll(newPossibleCursors) + MatcherResult.Success(0) + } + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} + +internal class AfterLineCursorMatcher : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (possibleCursors.any { editor.offsetToBufferPosition(index).line > editor.offsetToBufferPosition(it.offset.point).line }) { + val newPossibleCursors = possibleCursors.filter { editor.offsetToBufferPosition(index).line > editor.offsetToBufferPosition(it.offset.point).line } + possibleCursors.clear() + possibleCursors.addAll(newPossibleCursors) + MatcherResult.Success(0) + } + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/MarkMatchers.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/MarkMatchers.kt new file mode 100644 index 0000000000..cbc35d1c74 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/MarkMatchers.kt @@ -0,0 +1,40 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection + +internal abstract class BaseMarkMatcher(val mark: Char) : Matcher { + override fun matches(editor: VimEditor, index: Int, groups: VimMatchGroupCollection, isCaseInsensitive: Boolean, possibleCursors: MutableList): MatcherResult { + val newPossibleCursors = possibleCursors.filter { matchesCondition(index, it) } + return if (newPossibleCursors.isNotEmpty()) { + possibleCursors.clear() + possibleCursors.addAll(newPossibleCursors) + MatcherResult.Success(0) + } else MatcherResult.Failure + } + override fun isEpsilon(): Boolean = true + + abstract fun matchesCondition(index: Int, caret: VimCaret): Boolean + protected fun getMarkOffset(caret: VimCaret): Int? = caret.markStorage.getMark(mark)?.offset(caret.editor) +} + +internal class AtMarkMatcher(mark: Char) : BaseMarkMatcher(mark) { + override fun matchesCondition(index: Int, caret: VimCaret): Boolean = index == getMarkOffset(caret) +} + +internal class BeforeMarkMatcher(mark: Char) : BaseMarkMatcher(mark) { + override fun matchesCondition(index: Int, caret: VimCaret): Boolean = getMarkOffset(caret)?.let { index < it } ?: false +} + +internal class AfterMarkMatcher(mark: Char) : BaseMarkMatcher(mark) { + override fun matchesCondition(index: Int, caret: VimCaret): Boolean = getMarkOffset(caret)?.let { index > it } ?: false +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/Matcher.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/Matcher.kt new file mode 100644 index 0000000000..8c49a4fcdc --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/Matcher.kt @@ -0,0 +1,44 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection + +/** + * A matcher is used to decide if a transition can be taken, + * depending on what character is next in the input as well + * as other information contained in the editor or in the NFA + */ +internal interface Matcher { + /** + * Determines whether the matcher should match. + * + * @param editor The editor in its current state + * @param index The current index in the text of the editor + * @param groups The groups captured so far + * @param isCaseInsensitive Whether the matcher should ignore case + * @param possibleCursors The cursors that are allowed to match + * + * @return A result indicating either a failure to match, or success with the number of consumed characters + */ + fun matches( + editor: VimEditor, + index : Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult + + /** + * Returns true if this matcher never consumes any input. + */ + fun isEpsilon(): Boolean +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/MatcherResult.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/MatcherResult.kt new file mode 100644 index 0000000000..e406d4cfe8 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/MatcherResult.kt @@ -0,0 +1,26 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +/** + * Indicates the result of attempting to match with a Matcher + */ +internal sealed class MatcherResult { + /** + * Indicates that the Matcher successfully matched. + * + * @param consumed The number of characters consumed + */ + data class Success(val consumed: Int) : MatcherResult() + + /** + * Indicates that the Matcher doesn't match + */ + object Failure : MatcherResult() +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/PredicateMatcher.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/PredicateMatcher.kt new file mode 100644 index 0000000000..d4378f8641 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/PredicateMatcher.kt @@ -0,0 +1,57 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection + +/** + * Matcher used to match a character against a predicate + * + * @param predicate The predicate used to check if the character should be accepted + */ +internal class PredicateMatcher(val predicate: (Char) -> Boolean) : Matcher { + override fun matches( + editor: VimEditor, + index: Int, groups: + VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (index < editor.text().length && predicate(editor.text()[index])) MatcherResult.Success(1) + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return false + } +} + +/** + * Matcher used to match a character against a predicate + * + * @param predicate The predicate used to check if the character should be accepted + */ +internal class EditorAwarePredicateMatcher(val predicate: (VimEditor, Char) -> Boolean) : Matcher { + override fun matches( + editor: VimEditor, + index: Int, groups: + VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (index < editor.text().length && predicate(editor, editor.text()[index])) MatcherResult.Success(1) + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return false + } +} diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/StartOfFileMatcher.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/StartOfFileMatcher.kt new file mode 100644 index 0000000000..87d8471cf1 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/StartOfFileMatcher.kt @@ -0,0 +1,32 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection + +/** + * Matcher used to check if index is at the start of a file. + */ +internal class StartOfFileMatcher : Matcher{ + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (index == 0) MatcherResult.Success(0) else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/StartOfLineMatcher.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/StartOfLineMatcher.kt new file mode 100644 index 0000000000..c222398b72 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/StartOfLineMatcher.kt @@ -0,0 +1,33 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection + +/** + * Matcher used to check if index is at the start of a line. + */ +internal class StartOfLineMatcher : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + return if (index == 0 || editor.text()[index - 1] == '\n') MatcherResult.Success(0) + else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/StartOfWordMatcher.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/StartOfWordMatcher.kt new file mode 100644 index 0000000000..b348ba30c5 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/StartOfWordMatcher.kt @@ -0,0 +1,38 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.options.helpers.KeywordOptionHelper +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection + +/** + * Matcher used to check if index is at the start of a word. + */ +internal class StartOfWordMatcher : Matcher { + override fun matches( + editor: VimEditor, + index: Int, + groups: VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + if (index >= editor.text().length) return MatcherResult.Failure + + val isKeywordAtIndex = KeywordOptionHelper.isKeyword(editor, editor.text()[index]) + val isKeywordBeforeIndex = editor.text().getOrNull(index - 1)?.let { KeywordOptionHelper.isKeyword(editor, it) } ?: false + + return if (!isKeywordBeforeIndex && isKeywordAtIndex) MatcherResult.Success(0) else MatcherResult.Failure + } + + override fun isEpsilon(): Boolean { + return true + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/VisualAreaMatcher.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/VisualAreaMatcher.kt new file mode 100644 index 0000000000..c99ee55f30 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/nfa/matcher/VisualAreaMatcher.kt @@ -0,0 +1,53 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.nfa.matcher + +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.injector +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection +import com.maddyhome.idea.vim.state.mode.Mode +import com.maddyhome.idea.vim.state.mode.inVisualMode + +/** + * Matcher used to check if index is inside the visual area. + */ +internal class VisualAreaMatcher : Matcher { + override fun matches( + editor: VimEditor, + index: Int, groups: + VimMatchGroupCollection, + isCaseInsensitive: Boolean, + possibleCursors: MutableList + ): MatcherResult { + val processGroup = injector.processGroup + val newPossibleCursors = if (editor.inVisualMode) { + possibleCursors.filter { it.hasSelection() && index >= it.selectionStart && index < it.selectionEnd } + } + // IdeaVim exits visual mode before command processing (e.g. substitute), so we work with lastSelectionInfo + else if ((processGroup.isCommandProcessing || injector.vimscriptExecutor.executingVimscript) + && processGroup.modeBeforeCommandProcessing is Mode.VISUAL) { + possibleCursors.filter { it.lastSelectionInfo.isSelected(index, editor) } + } else { + emptyList() + } + + return if (newPossibleCursors.isNotEmpty()) { + possibleCursors.clear() + possibleCursors.addAll(newPossibleCursors) + MatcherResult.Success(0) + } else { + MatcherResult.Failure + } + } + + override fun isEpsilon(): Boolean { + return true + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/strategies/BacktrackingStrategy.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/strategies/BacktrackingStrategy.kt new file mode 100644 index 0000000000..766ad4da40 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/strategies/BacktrackingStrategy.kt @@ -0,0 +1,226 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.strategies + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.VimRegexErrors +import com.maddyhome.idea.vim.regexp.engine.nfa.NFA +import com.maddyhome.idea.vim.regexp.engine.nfa.NFAAssertion +import com.maddyhome.idea.vim.regexp.engine.nfa.NFAState +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.MatcherResult +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection +import com.maddyhome.idea.vim.regexp.match.VimMatchResult +import kotlin.math.max + +/** + * Uses a backtracking based strategy to simulate the nfa. This strategy is very powerful, since it + * can be used with any nfa, but comes at the cost of speed. + */ +internal class BacktrackingStrategy : SimulationStrategy { + + /** + * Memory used to store capture groups + */ + private val groups: VimMatchGroupCollection = VimMatchGroupCollection() + + override fun simulate(nfa: NFA, editor: VimEditor, startIndex: Int, isCaseInsensitive: Boolean): SimulationResult { + groups.clear() + if (simulate(editor, startIndex, nfa.startState, nfa.acceptState, isCaseInsensitive, editor.carets().toMutableList()).simulationResult) { + return SimulationResult.Complete( + groups.get(0)?.let { + VimMatchResult.Success( + it.range, + it.value, + groups + ) + } ?: run { VimMatchResult.Failure(VimRegexErrors.E486) } + ) + } + return SimulationResult.Complete(VimMatchResult.Failure(VimRegexErrors.E486)) + } + + /** + * Simulates the NFA in a depth-first search fashion. + * + * @param editor The editor that is used for the simulation + * @param index The current index of the text in the simulation + * @param state The current NFA state in the simulation + * @param targetState The NFA state that needs to be found for a successful match + * @param isCaseInsensitive Whether the simulation should ignore case + * @param maxIndex The maximum index of the text that the simulation is allowed to go to + * @param possibleCursors The cursors that are allowed to match + * + * @return The result of the simulation. It tells whether it was successful, and at what index it stopped. + */ + private fun simulate( + editor: VimEditor, + index: Int, + state: NFAState, + targetState: NFAState, + isCaseInsensitive: Boolean, + possibleCursors: MutableList, + maxIndex: Int = editor.text().length + ): NFASimulationResult { + val stack = emptyList().toMutableList() + stack.add(SimulationStackFrame(index, state, emptySet())) + + while (stack.isNotEmpty()) { + val currFrame = stack.removeLast() + if (currFrame.currentIndex > maxIndex) continue + updateCaptureGroups(editor, currFrame.currentIndex, currFrame.currentState) + if (currFrame.currentState === targetState) return NFASimulationResult(true, currFrame.currentIndex) + currFrame.currentState.assertion?.let { + val assertionResult = handleAssertion(editor, currFrame.currentIndex, isCaseInsensitive, it, possibleCursors) + if (assertionResult.simulationResult) stack.add(SimulationStackFrame(assertionResult.index, currFrame.currentState.assertion!!.jumpTo, emptySet())) + } + + for (transition in currFrame.currentState.transitions.reversed()) { + val transitionMatcherResult = transition.matcher.matches(editor, currFrame.currentIndex, groups, isCaseInsensitive, possibleCursors) + if (transitionMatcherResult !is MatcherResult.Success) continue + val destState = transition.destState + if (transitionMatcherResult.consumed == 0 && currFrame.epsilonVisited.contains(destState)) continue + val nextIndex = currFrame.currentIndex + transitionMatcherResult.consumed + val epsilonVisitedCopy = if (transitionMatcherResult.consumed == 0 && !currFrame.epsilonVisited.contains(destState)) currFrame.epsilonVisited.plusElement(currFrame.currentState) else HashSet() + stack.add(SimulationStackFrame(nextIndex, destState, epsilonVisitedCopy)) + } + } + return NFASimulationResult(false, index) + } + + /** + * Handles a state of the NFA that has an assertion. Determines if the assertion + * was successful or not, and where the normal simulation should resume. + * + * @param editor The editor that is used for the simulation + * @param currentIndex The current index of the text in the simulation + * @param isCaseInsensitive Whether the simulation should ignore case + * @param assertion The assertion that is to be handled + * @param possibleCursors The cursors that are allowed to match + * + * @return The result of the assertion. It tells whether it was successful, and at what index it stopped. + */ + private fun handleAssertion( + editor: VimEditor, + currentIndex: Int, + isCaseInsensitive: Boolean, + assertion: NFAAssertion, + possibleCursors: MutableList + ): NFASimulationResult { + return if (assertion.isAhead) handleAheadAssertion(editor, currentIndex, isCaseInsensitive, assertion, possibleCursors) + else handleBehindAssertion(editor, currentIndex, isCaseInsensitive, assertion, possibleCursors) + } + + /** + * Handles a state of the NFA that has an assertion ahead. Determines if the assertion + * was successful or not, and where the normal simulation should resume. + * + * @param editor The editor that is used for the simulation + * @param currentIndex The current index of the text in the simulation + * @param isCaseInsensitive Whether the simulation should ignore case + * @param assertion The assertion that is to be handled + * @param possibleCursors The cursors that are allowed to match + * + * @return The result of the assertion. It tells whether it was successful, and at what index it stopped. + */ + private fun handleAheadAssertion( + editor: VimEditor, + currentIndex: Int, + isCaseInsensitive: Boolean, + assertion: NFAAssertion, + possibleCursors: MutableList + ): NFASimulationResult { + val assertionResult = simulate(editor, currentIndex, assertion.startState, assertion.endState, isCaseInsensitive, possibleCursors) + if (assertionResult.simulationResult != assertion.isPositive) { + return NFASimulationResult(false, currentIndex) + } + + /** + * If the assertion should consume input, the normal simulation resumes at the index where the + * assertion stopped, else it resumes at the index that the simulation was at before the assertion. + */ + val newIndex = if (assertion.shouldConsume) assertionResult.index else currentIndex + return NFASimulationResult(true, newIndex) + } + + /** + * Handles a state of the NFA that has an assertion behind. Determines if the assertion + * was successful or not, and where the normal simulation should resume. + * + * @param editor The editor that is used for the simulation + * @param currentIndex The current index of the text in the simulation + * @param isCaseInsensitive Whether the simulation should ignore case + * @param assertion The assertion that is to be handled + * @param possibleCursors The cursors that are allowed to match + * + * @return The result of the assertion. It tells whether it was successful, and at what index it stopped. + */ + private fun handleBehindAssertion( + editor: VimEditor, + currentIndex: Int, + isCaseInsensitive: Boolean, + assertion: NFAAssertion, + possibleCursors: MutableList + ): NFASimulationResult { + var lookBehindStartIndex = currentIndex - 1 + val minIndex = if (assertion.limit == 0) 0 else max(0, currentIndex - assertion.limit) + var seenNewLine = false + while (lookBehindStartIndex >= minIndex && !(seenNewLine && editor.text()[lookBehindStartIndex] != '\n')) { + // the lookbehind is allowed to look back as far as to the start of the previous line + if (editor.text()[lookBehindStartIndex] == '\n') seenNewLine = true + + val result = simulate(editor, lookBehindStartIndex, assertion.startState, assertion.endState, isCaseInsensitive, possibleCursors, maxIndex = currentIndex) + // found a match that ends before the "currentIndex" + if (result.simulationResult && result.index == currentIndex) { + return if (assertion.isPositive) NFASimulationResult( + true, + currentIndex + ) + else NFASimulationResult(false, currentIndex) + } + lookBehindStartIndex-- + } + return if (assertion.isPositive) NFASimulationResult(false, currentIndex) + else NFASimulationResult(true, currentIndex) + } + + /** + * Updates the results of capture groups' matches + * + * @param editor The editor that is used for the simulation + * @param index The current index of the text in the simulation + * @param state The current state in the simulation + */ + private fun updateCaptureGroups(editor: VimEditor, index: Int, state: NFAState) { + for (groupNumber in state.startCapture) groups.setGroupStart(groupNumber, index) + for (groupNumber in state.endCapture) groups.setGroupEnd(groupNumber, index, editor.text()) + for (groupNumber in state.forceEndCapture) groups.setForceGroupEnd(groupNumber, index, editor.text()) + } +} + +/** + * Represents the result of simulating a NFA + */ +private data class NFASimulationResult( + /** + * Whether the simulation reached a target state successfully + */ + val simulationResult: Boolean, + + /** + * The index of the input editor text at which the simulation stopped + */ + val index: Int +) + +private data class SimulationStackFrame( + val currentIndex: Int, + val currentState: NFAState, + val epsilonVisited: Set +) \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/strategies/ImplicitDFAStrategy.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/strategies/ImplicitDFAStrategy.kt new file mode 100644 index 0000000000..a399f91e8f --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/strategies/ImplicitDFAStrategy.kt @@ -0,0 +1,76 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.strategies + +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.VimRegexErrors +import com.maddyhome.idea.vim.regexp.engine.nfa.NFA +import com.maddyhome.idea.vim.regexp.engine.nfa.NFAState +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.BackreferenceMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.MatcherResult +import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection +import com.maddyhome.idea.vim.regexp.match.VimMatchResult + +internal class ImplicitDFAStrategy : SimulationStrategy { + override fun simulate(nfa: NFA, editor: VimEditor, startIndex: Int, isCaseInsensitive: Boolean): SimulationResult { + val possibleCursors = editor.carets().toMutableList() + var currentStates = mutableListOf(nfa.startState) + val groups = VimMatchGroupCollection() + + for (index in startIndex..editor.text().length) { + val epsilonClosures = currentStates.flatMap { state -> epsilonClosure(editor, index, isCaseInsensitive, groups, possibleCursors, state) } + val nextStates = mutableListOf() + for (state in epsilonClosures) { + + // if there is anything that the algorithm can't deal with, we can't know for sure whether there is a match or not + if (state.assertion != null || + state.hasLazyMulti || + state.transitions.any { it.matcher is BackreferenceMatcher }) return SimulationResult.Incomplete + + nextStates.addAll(state.transitions.filter { + !it.matcher.isEpsilon() && + it.matcher.matches(editor, index, groups, isCaseInsensitive, possibleCursors) is MatcherResult.Success + }.map { it.destState }) + } + if (nextStates.isEmpty()) break + currentStates = nextStates + } + groups.get(0)?.let { + return SimulationResult.Complete(VimMatchResult.Success(it.range, it.value, groups)) + } + return SimulationResult.Complete(VimMatchResult.Failure(VimRegexErrors.E486)) + } + + private fun epsilonClosure(editor: VimEditor, index: Int, isCaseInsensitive: Boolean, groups: VimMatchGroupCollection, possibleCursors: MutableList, state: NFAState, visited: MutableSet = mutableSetOf()): List { + updateCaptureGroups(editor, index, state, groups) + if (!state.transitions.any { it.matcher.isEpsilon() }) return listOf(state) + + val result = mutableListOf() + for (transition in state.transitions.filter { it.matcher.isEpsilon() }) { + if (visited.contains(transition.destState) || transition.matcher.matches(editor, index, groups, isCaseInsensitive, possibleCursors) is MatcherResult.Failure) continue + visited.add(transition.destState) + result.addAll(epsilonClosure(editor, index, isCaseInsensitive, groups, possibleCursors, transition.destState, visited)) + } + return result + } + + /** + * Updates the results of capture groups' matches + * + * @param editor The editor that is used for the simulation + * @param index The current index of the text in the simulation + * @param state The current state in the simulation + */ + private fun updateCaptureGroups(editor: VimEditor, index: Int, state: NFAState, groups: VimMatchGroupCollection) { + for (groupNumber in state.startCapture) groups.setGroupStart(groupNumber, index) + for (groupNumber in state.endCapture) groups.setGroupEnd(groupNumber, index, editor.text()) + for (groupNumber in state.forceEndCapture) groups.setForceGroupEnd(groupNumber, index, editor.text()) + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/strategies/SimulationResult.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/strategies/SimulationResult.kt new file mode 100644 index 0000000000..c268e3bbd0 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/strategies/SimulationResult.kt @@ -0,0 +1,28 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.strategies + +import com.maddyhome.idea.vim.regexp.match.VimMatchResult + +/** + * The result of applying a SimulationStrategy. + */ +internal sealed class SimulationResult { + /** + * The simulation is deemed "complete" if it found a match, or if + * it can determine with absolute certainty there are no matches. + */ + data class Complete(val matchResult: VimMatchResult) : SimulationResult() + + /** + * The simulation is deemed "incomplete" if it just isn't powerful + * enough to determine whether there is match. + */ + object Incomplete : SimulationResult() +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/strategies/SimulationStrategy.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/strategies/SimulationStrategy.kt new file mode 100644 index 0000000000..6058973628 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/engine/strategies/SimulationStrategy.kt @@ -0,0 +1,30 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.engine.strategies + +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.regexp.engine.nfa.NFA + +/** + * A strategy that can be used to simulate a NFA + */ +internal interface SimulationStrategy { + + /** + * Simulates a nfa using some strategy + * + * @param nfa The nfa to simulate + * @param editor The editor that is used for the simulation + * @param startIndex The index where the simulation should start + * @param isCaseInsensitive Whether the simulation should ignore case + * + * @return The resulting match result + */ + fun simulate(nfa: NFA, editor: VimEditor, startIndex: Int, isCaseInsensitive: Boolean): SimulationResult +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/match/VimMatchGroup.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/match/VimMatchGroup.kt new file mode 100644 index 0000000000..2051eec054 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/match/VimMatchGroup.kt @@ -0,0 +1,26 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.match + +import com.maddyhome.idea.vim.common.TextRange + +/** + * The resulting match of a capture group + */ +public class VimMatchGroup( + /** + * The range of indices in the editor text of where the match was found + */ + public val range: TextRange, + + /** + * The string value of the match found + */ + public val value: String +) \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/match/VimMatchGroupCollection.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/match/VimMatchGroupCollection.kt new file mode 100644 index 0000000000..bc2b5f0c1c --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/match/VimMatchGroupCollection.kt @@ -0,0 +1,128 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.match + +import com.maddyhome.idea.vim.common.TextRange + +/** + * A collection of match results of capture groups + */ +public class VimMatchGroupCollection( + /** + * The maximum amount of capture groups. + * There can only be a maximum of 10 capture groups. + * Index 0 is for the entire match and the other 9 for explicit + * capture groups. + */ + override val size: Int = 10, + +) : Collection { + /** + * Store the capture groups + */ + private val groups: MutableList = MutableList(size) { null } + + /** + * Store the start indexes of groups + */ + private val groupStarts: IntArray = IntArray(size) + + /** + * Used to check if a certain group capture has started and ended + */ + private val completedGroups: BooleanArray = BooleanArray(size) { false } + + private val forceEnded: BooleanArray = BooleanArray(size) { false } + + /** + * Store the highest seen group number plus one, which + * should correspond to the number of tracked groups + */ + private var groupCount: Int = 0 + + /** + * Gets a single capture group match + * + * @param index The number of the capture group to get + * + * @return The capture group with the desired number, or null if the number is too big + */ + public fun get(index: Int): VimMatchGroup? { + return if (index < groupCount && index < groups.size && index >= 0) groups[index] + else null + } + + /** + * Sets the start index of a certain capture group + * + * @param groupNumber The number of the capture group + * @param startIndex The index where the capture group match starts + */ + internal fun setGroupStart(groupNumber: Int, startIndex: Int) { + groupStarts[groupNumber] = startIndex + if (groupNumber == 0) completedGroups[groupNumber] = false + } + + /** + * Sets the end index of a certain capture group if it wasn't previously set + * + * @param groupNumber The number of the capture group + * @param endIndex The index where the capture group match end + * @param text The text used to extract the matched string + */ + internal fun setGroupEnd(groupNumber: Int, endIndex: Int, text: CharSequence) { + if (completedGroups[groupNumber] && forceEnded[groupNumber]) return + + val range = TextRange(groupStarts[groupNumber], endIndex) + groups[groupNumber] = VimMatchGroup(range, text.substring(range.startOffset, range.endOffset)) + groupCount = maxOf(groupCount, groupNumber + 1) + completedGroups[groupNumber] = true + } + + /** + * Sets the end index of a certain capture group + * + * @param groupNumber The number of the capture group + * @param endIndex The index where the capture group match end + * @param text The text used to extract the matched string + */ + internal fun setForceGroupEnd(groupNumber: Int, endIndex: Int, text: CharSequence) { + val range = TextRange(groupStarts[groupNumber], endIndex) + groups[groupNumber] = VimMatchGroup(range, text.substring(range.startOffset, range.endOffset)) + groupCount = maxOf(groupCount, groupNumber + 1) + completedGroups[groupNumber] = true + forceEnded[groupNumber] = true + } + + internal fun clear() { + groupCount = 0 + for (groupNumber in completedGroups.indices) completedGroups[groupNumber] = false + for (groupNumber in forceEnded.indices) forceEnded[groupNumber] = false + } + + override fun contains(element: VimMatchGroup): Boolean { + return groups.subList(0, groupCount).contains(element) + } + + override fun containsAll(elements: Collection): Boolean { + return groups.subList(0, groupCount).containsAll(elements) + } + + override fun isEmpty(): Boolean { + return groups.subList(0, groupCount).isEmpty() + } + + override fun iterator(): Iterator { + return groups.subList(0, groupCount) + .filterNotNull() + .iterator() + } + + +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/match/VimMatchResult.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/match/VimMatchResult.kt new file mode 100644 index 0000000000..fd18111c6f --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/match/VimMatchResult.kt @@ -0,0 +1,40 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.match + +import com.maddyhome.idea.vim.common.TextRange +import com.maddyhome.idea.vim.regexp.VimRegexErrors + +/** + * The result of matching a pattern against an editor + */ +public sealed class VimMatchResult { + + /** + * Successful match + * + * @param range The range of indices in the editor text of where the match was found + * @param value The string value of the match found + * @param groups The results of sub-matches corresponding to capture groups + */ + public data class Success( + public val range: TextRange, + public val value: String, + public val groups: VimMatchGroupCollection + ) : VimMatchResult() + + /** + * Match was unsuccessful or not found + * + * @param errorCode Code of the error that caused matching to fail + */ + public data class Failure( + val errorCode: VimRegexErrors + ) : VimMatchResult() +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/VimRegexParser.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/VimRegexParser.kt new file mode 100644 index 0000000000..8eb47e2fde --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/VimRegexParser.kt @@ -0,0 +1,60 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.parser + +import com.maddyhome.idea.vim.regexp.parser.error.BailErrorLexer +import com.maddyhome.idea.vim.regexp.parser.error.VimRegexParserErrorStrategy +import com.maddyhome.idea.vim.regexp.parser.error.VimRegexParserException +import com.maddyhome.idea.vim.regexp.parser.generated.RegexLexer +import com.maddyhome.idea.vim.regexp.parser.generated.RegexParser +import org.antlr.v4.runtime.CharStreams +import org.antlr.v4.runtime.CommonTokenStream + +/** + * Represents a parser of Vim's patterns. + * This is a singleton. + */ +internal object VimRegexParser { + + /** + * Tries to parse a given pattern + * + * @param pattern The Vim pattern that is to be parsed + * + * @return The result, either successful or not, of trying to parse the pattern + */ + fun parse(pattern: String) : VimRegexParserResult { + return try { + val regexLexer = BailErrorLexer(CharStreams.fromString(pattern)) + val tokens = CommonTokenStream(regexLexer) + val parser = RegexParser(tokens) + parser.errorHandler = VimRegexParserErrorStrategy() + parser.errorListeners.clear() + val tree = parser.pattern() + VimRegexParserResult.Success(tree, getCaseSensitivitySettings(regexLexer)) + } catch (e: VimRegexParserException) { + VimRegexParserResult.Failure(e.errorCode) + } + } + + /** + * Auxiliary function used to get the case sensitivity settings from the lexer. + * The lexer has an internal flag, ignoreCase, that is initially null; if it + * then comes across a \c, it sets this flag to true, and if it comes across a + * \C, sets it to false. + */ + private fun getCaseSensitivitySettings(lexer: RegexLexer) : CaseSensitivitySettings { + return when (lexer.ignoreCase) { + // explicitly compare with true and false, since it might be null + true -> CaseSensitivitySettings.IGNORE_CASE + false -> CaseSensitivitySettings.NO_IGNORE_CASE + else -> CaseSensitivitySettings.DEFAULT + } + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/VimRegexParserResult.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/VimRegexParserResult.kt new file mode 100644 index 0000000000..f9e94050eb --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/VimRegexParserResult.kt @@ -0,0 +1,47 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.parser + +import com.maddyhome.idea.vim.regexp.VimRegexErrors +import org.antlr.v4.runtime.tree.ParseTree + +/** + * The result of trying to parse a string representing a Vim + * regular expression into a parse tree + */ +internal sealed class VimRegexParserResult { + + /** + * Represents a successful parse + * + * @param tree The parse tree of the parsed regular expression + * @param caseSensitivitySettings The value of the case sensitivity flag in the regular expression + */ + data class Success(val tree: ParseTree, val caseSensitivitySettings: CaseSensitivitySettings) : VimRegexParserResult() + + /** + * Represents an unsuccessful parse + * + * @param errorCode A message giving output on why parsing failed + */ + data class Failure(val errorCode: VimRegexErrors) : VimRegexParserResult() +} + +/** + * Represents the case sensitivity setting of a regular expression + * IGNORE_CASE is for \c, NO_IGNORE_CASE for \C, and DEFAULT when + * none of these tokens are present. + * + * @see :help /ignorecase + */ +internal enum class CaseSensitivitySettings { + DEFAULT, + IGNORE_CASE, + NO_IGNORE_CASE +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/error/BailErrorLexer.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/error/BailErrorLexer.kt new file mode 100644 index 0000000000..626bfe04e1 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/error/BailErrorLexer.kt @@ -0,0 +1,20 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.parser.error + +import com.maddyhome.idea.vim.regexp.VimRegexErrors +import com.maddyhome.idea.vim.regexp.parser.generated.RegexLexer +import org.antlr.v4.runtime.CharStream +import org.antlr.v4.runtime.LexerNoViableAltException + +internal class BailErrorLexer(input: CharStream) : RegexLexer(input) { + override fun recover(e: LexerNoViableAltException) { + throw VimRegexParserException(VimRegexErrors.E383) + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/error/VimRegexParserErrorStrategy.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/error/VimRegexParserErrorStrategy.kt new file mode 100644 index 0000000000..a8df023eb0 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/error/VimRegexParserErrorStrategy.kt @@ -0,0 +1,28 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.parser.error + +import com.maddyhome.idea.vim.regexp.VimRegexErrors +import org.antlr.v4.runtime.DefaultErrorStrategy +import org.antlr.v4.runtime.Parser +import org.antlr.v4.runtime.RecognitionException +import org.antlr.v4.runtime.Token + +internal class VimRegexParserErrorStrategy : DefaultErrorStrategy() { + + override fun recover(recognizer: Parser?, e: RecognitionException) { + throw VimRegexParserException(VimRegexErrors.E383) + } + + override fun recoverInline(recognizer: Parser?): Token { + throw VimRegexParserException(VimRegexErrors.E383) + } + + override fun sync(recognizer: Parser?) { } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/error/VimRegexParserException.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/error/VimRegexParserException.kt new file mode 100644 index 0000000000..3a86445eed --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/error/VimRegexParserException.kt @@ -0,0 +1,15 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.parser.error + +import com.maddyhome.idea.vim.regexp.VimRegexErrors + +internal data class VimRegexParserException( + val errorCode: VimRegexErrors +) : RuntimeException(errorCode.toString()) \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/visitors/CollectionElementVisitor.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/visitors/CollectionElementVisitor.kt new file mode 100644 index 0000000000..a1deb5bba7 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/visitors/CollectionElementVisitor.kt @@ -0,0 +1,161 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.parser.visitors + +import com.maddyhome.idea.vim.regexp.parser.generated.RegexParser +import com.maddyhome.idea.vim.regexp.parser.generated.RegexParserBaseVisitor + +/** + * A tree visitor for visiting nodes representing a collection. + * + * @see :help /collection + */ +internal class CollectionElementVisitor : RegexParserBaseVisitor>() { + + override fun visitSingleColElem(ctx: RegexParser.SingleColElemContext): Pair { + val elem = cleanLiteralChar(ctx.text) + return Pair(CollectionElement.SingleCharacter(elem.first), elem.second) + } + + override fun visitRangeColElem(ctx: RegexParser.RangeColElemContext): Pair { + val rangeStart = cleanLiteralChar(ctx.start.text) + val rangeEnd = cleanLiteralChar(ctx.end.text) + val includesEOL = rangeStart.second || rangeEnd.second + return Pair(CollectionElement.CharacterRange(rangeStart.first, rangeEnd.first), includesEOL) + } + + override fun visitAlnumClass(ctx: RegexParser.AlnumClassContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{it.isLetterOrDigit()}, false) + } + + override fun visitAlphaClass(ctx: RegexParser.AlphaClassContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{it.isLetter()}, false) + } + + override fun visitBlankClass(ctx: RegexParser.BlankClassContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{" \t".contains(it)}, false) + } + + override fun visitCntrlClass(ctx: RegexParser.CntrlClassContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{it.isISOControl()}, false) + } + + override fun visitDigitClass(ctx: RegexParser.DigitClassContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{it.isDigit()}, false) + } + + override fun visitGraphClass(ctx: RegexParser.GraphClassContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{it in '!'..'~'}, false) + } + + override fun visitLowerClass(ctx: RegexParser.LowerClassContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{it.isLowerCase()}, false) + } + + override fun visitPrintClass(ctx: RegexParser.PrintClassContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{!it.isISOControl()}, false) + } + + override fun visitPunctClass(ctx: RegexParser.PunctClassContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{ + it in '!'..'/' || + it in ':'..'@' || + it in '['..'`' || + it in '{'..'~' + }, false) + } + + override fun visitSpaceClass(ctx: RegexParser.SpaceClassContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{it.isWhitespace()}, false) + } + + override fun visitUpperClass(ctx: RegexParser.UpperClassContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{it.isUpperCase()}, false) + } + + override fun visitXdigitClass(ctx: RegexParser.XdigitClassContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{ + it in '0'..'9' || + it in 'a'..'f' || + it in 'A'..'F' + }, false) + } + + override fun visitReturnClass(ctx: RegexParser.ReturnClassContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{it == '\r'}, false) + } + + override fun visitTab(ctx: RegexParser.TabContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{it == '\t'}, false) + } + + override fun visitEsc(ctx: RegexParser.EscContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{it == ''}, false) + } + + override fun visitBackspaceClass(ctx: RegexParser.BackspaceClassContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{it == '\b'}, false) + } + + override fun visitIdentClass(ctx: RegexParser.IdentClassContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{it.isJavaIdentifierPart()}, false) + } + + override fun visitKeywordClass(ctx: RegexParser.KeywordClassContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{it.isLetterOrDigit() || it == '_'}, false) + } + + override fun visitFnameClass(ctx: RegexParser.FnameClassContext?): Pair { + return Pair(CollectionElement.CharacterClassExpression{it.isLetter() || "_/.-+,#$%~=".contains(it)}, false) + } + + private fun cleanLiteralChar(str: String) : Pair { + return if (str.length > 2 && str[0] == '\\' && str[1] == 'd') Pair(Char(str.substring(2).toInt()), false) + else if (str.length > 2 && str[0] == '\\' && str[1] == 'o') Pair(Char(str.substring(2).toInt(8)), false) + else if (str.length > 2 && str[0] == '\\' && str[1] == 'x') Pair(Char(str.substring(2).toInt(16)), false) + else if (str.length > 2 && str[0] == '\\' && str[1] == 'u') Pair(Char(str.substring(2).toInt(16)), false) + else if (str.length > 2 && str[0] == '\\' && str[1] == 'U') Pair(Char(str.substring(2).toInt(16)), false) + else if (str.length == 2 && str[0] == '\\' && str[1] == 'e') Pair('', false) + else if (str.length == 2 && str[0] == '\\' && str[1] == 't') Pair('\t', false) + else if (str.length == 2 && str[0] == '\\' && str[1] == 'r') Pair('\r', false) + else if (str.length == 2 && str[0] == '\\' && str[1] == 'b') Pair('\b', false) + else if (str.length == 2 && str[0] == '\\' && str[1] == 'n') Pair('\n', true) + else if (str.length == 2 && str[0] == '\\') Pair(str[1], false) + else Pair(str[0], false) + } +} + +/** + * Represents a single element in a collection. This element can be + * a single character, a range of characters, or a character class + * expression. + */ +internal sealed class CollectionElement { + /** + * Represents a single character collection element. + * + * @param char The character element. + */ + data class SingleCharacter(val char: Char) : CollectionElement() + + /** + * Represents a range of characters collection element. + * + * @param start The starting character of the range. + * @param end The ending character of the range. + */ + data class CharacterRange(val start: Char, val end: Char) : CollectionElement() + + /** + * Represents a character class expression element. e.g. [:digit:]. + * + * @param predicate The condition that a character has to meet to belong in the character class. + */ + data class CharacterClassExpression(val predicate: (Char) -> Boolean) : CollectionElement() +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/visitors/MultiVisitor.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/visitors/MultiVisitor.kt new file mode 100644 index 0000000000..7b4e8d3813 --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/visitors/MultiVisitor.kt @@ -0,0 +1,146 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.parser.visitors + +import com.maddyhome.idea.vim.regexp.parser.error.VimRegexParserException +import com.maddyhome.idea.vim.regexp.parser.generated.RegexParser +import com.maddyhome.idea.vim.regexp.parser.generated.RegexParserBaseVisitor +import org.antlr.v4.runtime.Token +import org.antlr.v4.runtime.tree.TerminalNode + +/** + * A tree visitor for visiting nodes representing a multi. It is used to identify + * what type of multi is being visited. + * + * @see :help /multi + */ +internal class MultiVisitor : RegexParserBaseVisitor() { + + override fun visitZeroOrMore(ctx: RegexParser.ZeroOrMoreContext): Multi { + return Multi.RangeMulti(RangeBoundary.IntRangeBoundary(0), RangeBoundary.InfiniteRangeBoundary, true) + } + + override fun visitOneOrMore(ctx: RegexParser.OneOrMoreContext): Multi { + return Multi.RangeMulti(RangeBoundary.IntRangeBoundary(1), RangeBoundary.InfiniteRangeBoundary, true) + } + + override fun visitZeroOrOne(ctx: RegexParser.ZeroOrOneContext?): Multi { + return Multi.RangeMulti(RangeBoundary.IntRangeBoundary(0), RangeBoundary.IntRangeBoundary(1), true) + } + + override fun visitRangeGreedy(ctx: RegexParser.RangeGreedyContext): Multi { + return visitRange(ctx.lower_bound, ctx.upper_bound, ctx.COMMA(), true) + } + + override fun visitRangeLazy(ctx: RegexParser.RangeLazyContext): Multi { + return visitRange(ctx.lower_bound, ctx.upper_bound, ctx.COMMA(), false) + } + + private fun visitRange(lowerBoundToken: Token?, upperBoundToken: Token?, comma: TerminalNode?, isGreedy: Boolean): Multi { + val lowerDelimiter = if (lowerBoundToken == null) RangeBoundary.IntRangeBoundary(0) else RangeBoundary.IntRangeBoundary(lowerBoundToken.text.toInt()) + val upperDelimiter = if (comma != null) if (upperBoundToken == null) RangeBoundary.InfiniteRangeBoundary else RangeBoundary.IntRangeBoundary(upperBoundToken.text.toInt()) + else if (lowerBoundToken == null) RangeBoundary.InfiniteRangeBoundary else lowerDelimiter + return if (upperDelimiter is RangeBoundary.IntRangeBoundary && lowerDelimiter.i > upperDelimiter.i) Multi.RangeMulti(lowerDelimiter, upperDelimiter, isGreedy) + else Multi.RangeMulti(lowerDelimiter, upperDelimiter, isGreedy) + } + + override fun visitAtomic(ctx: RegexParser.AtomicContext?): Multi { + return Multi.AtomicMulti + } + + override fun visitPositiveLookahead(ctx: RegexParser.PositiveLookaheadContext?): Multi { + return Multi.AssertionMulti(isPositive = true, isAhead = true) + } + + override fun visitNegativeLookahead(ctx: RegexParser.NegativeLookaheadContext?): Multi { + return Multi.AssertionMulti(isPositive = false, isAhead = true) + } + + override fun visitPositiveLookbehind(ctx: RegexParser.PositiveLookbehindContext?): Multi { + return Multi.AssertionMulti(isPositive = true, isAhead = false) + } + + override fun visitNegativeLookbehind(ctx: RegexParser.NegativeLookbehindContext?): Multi { + return Multi.AssertionMulti(isPositive = false, isAhead = false) + } + + override fun visitPositiveLimitedLookbehind(ctx: RegexParser.PositiveLimitedLookbehindContext): Multi { + val limit = (Regex("\\d+").find(ctx.text))?.value?.toInt() ?: run { 0 } + return Multi.AssertionMulti(isPositive = true, isAhead = false, limit) + } + + override fun visitNegativeLimitedLookbehind(ctx: RegexParser.NegativeLimitedLookbehindContext): Multi { + val limit = (Regex("\\d+").find(ctx.text))?.value?.toInt() ?: run { 0 } + return Multi.AssertionMulti(isPositive = false, isAhead = false, limit) + } +} + +/** + * Represents a multi. + * + * @see :help multi + */ +internal sealed class Multi { + + /** + * Delimits the number of times that a multi should + * make a certain atom repeat itself + * + * @param lowerBoundary The minimum number of times that the atom can repeat itself. + * @param upperBoundary The maximum number of times that the atom can repeat itself. This number can be infinite. + * @param isGreedy Whether this multi is greedy. A greedy multi always consumes as much input + * it can, while a non-greedy, or lazy multi, consumes the least amount of input + * it can. + */ + internal data class RangeMulti( + val lowerBoundary: RangeBoundary.IntRangeBoundary, + val upperBoundary: RangeBoundary, + val isGreedy: Boolean + ) : Multi() + + /** + * Used to represent an atomic atom. Atoms that are atomic, match + * as if they were a whole pattern. + * + * @see :help /\@> + */ + object AtomicMulti : Multi() + + /** + * Used to represent an assertion multi. These + * are also known as look-ahead and look-behind. + * They can be positive, meaning that they must match, + * or negative, meaning that they must not match. + * + * @param isPositive Whether the assertion is positive + * @param isAhead Whether it is a look-ahead + */ + internal data class AssertionMulti( + val isPositive: Boolean, + val isAhead: Boolean, + val limit: Int = 0 + ) : Multi() +} + +/** + * Used to represent a boundary of a range multi + */ +internal sealed class RangeBoundary { + /** + * Represents an integer boundary + * + * @param i The boundary of the multi + */ + data class IntRangeBoundary(val i: Int) : RangeBoundary() + + /** + * Represents an infinite boundary + */ + object InfiniteRangeBoundary : RangeBoundary() +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/visitors/PatternVisitor.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/visitors/PatternVisitor.kt new file mode 100644 index 0000000000..acf505ddbf --- /dev/null +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/visitors/PatternVisitor.kt @@ -0,0 +1,744 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.parser.visitors + +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.options.helpers.KeywordOptionHelper +import com.maddyhome.idea.vim.regexp.engine.nfa.NFA +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.AfterColumnCursorMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.AfterColumnMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.AfterLineCursorMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.AfterLineMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.AfterMarkMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.AtColumnCursorMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.AtColumnMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.AtLineCursorMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.AtLineMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.AtMarkMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.BackreferenceMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.BeforeColumnCursorMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.BeforeColumnMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.BeforeLineCursorMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.BeforeLineMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.BeforeMarkMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.CharacterMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.CollectionMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.CollectionRange +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.CursorMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.DotMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.EditorAwarePredicateMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.EndOfFileMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.EndOfLineMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.EndOfWordMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.PredicateMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.StartOfFileMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.StartOfLineMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.StartOfWordMatcher +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.VisualAreaMatcher +import com.maddyhome.idea.vim.regexp.parser.generated.RegexParser +import com.maddyhome.idea.vim.regexp.parser.generated.RegexParserBaseVisitor + +/** + * A tree visitor for converting a parsed Vim pattern into an internal + * NFA, that is then used to then find matches in an editor. + * This is a singleton. + */ +internal object PatternVisitor : RegexParserBaseVisitor() { + + /** + * Tracks the number of capture groups visited + */ + private var groupCount: Int = 0 + + /** + * Maps tree nodes representing capture groups to their respective group number + */ + private val groupNumbers: HashMap = HashMap() + + /** + * Determines whether the visited tree contains + */ + internal var hasUpperCase: Boolean = false + + override fun visitPattern(ctx: RegexParser.PatternContext): NFA { + hasUpperCase = false + groupCount = 0 + groupNumbers.clear() + groupCount++ + val subNfa = visit(ctx.sub_pattern()) + subNfa.capture(0, false) + return subNfa + } + + override fun visitSub_pattern(ctx: RegexParser.Sub_patternContext): NFA { + return ctx.branches.map { visitBranch(it) }.union() + } + + override fun visitBranch(ctx: RegexParser.BranchContext): NFA { + val nfaStart = if (ctx.CARET() != null) NFA.fromMatcher(StartOfLineMatcher()) else NFA.fromSingleState() + val nfaEnd = if (ctx.DOLLAR() != null) NFA.fromMatcher(EndOfLineMatcher()) else NFA.fromSingleState() + + for (concat in ctx.concats.dropLast(1)) { + val subNFA = visit(concat) + subNFA.assert(shouldConsume = false, isPositive = true, isAhead = true) + nfaStart.concatenate(subNFA) + } + return nfaStart.concatenate(if (ctx.concats.isNotEmpty()) visit(ctx.concats.last()) else NFA.fromSingleState()).concatenate(nfaEnd) + } + + override fun visitConcat(ctx: RegexParser.ConcatContext): NFA { + return ctx.pieces.map { visitPiece(it) }.concatenate() + } + + override fun visitPiece(ctx: RegexParser.PieceContext): NFA { + if (ctx.multi() == null) return visit(ctx.atom()) + + val multi = MultiVisitor().visit(ctx.multi()) + + return when (multi) { + is Multi.RangeMulti -> buildQuantifiedNFA(ctx.atom(), multi) + is Multi.AtomicMulti -> return visit(ctx.atom()).assert(shouldConsume = true, isPositive = true, isAhead = true) + is Multi.AssertionMulti -> return visit(ctx.atom()).assert(shouldConsume = false, isPositive = multi.isPositive, isAhead = multi.isAhead, limit = multi.limit) + } + } + + private fun buildQuantifiedNFA(atom: RegexParser.AtomContext, range: Multi.RangeMulti) : NFA { + val prefixNFA = NFA.fromSingleState() + for (i in 0 until range.lowerBoundary.i) + prefixNFA.concatenate(visit(atom)) + + var suffixNFA = NFA.fromSingleState() + if (range.upperBoundary is RangeBoundary.InfiniteRangeBoundary) suffixNFA = visit(atom).closure(range.isGreedy) + else { + for (i in range.lowerBoundary.i until (range.upperBoundary as RangeBoundary.IntRangeBoundary).i) { + suffixNFA.concatenate(visit(atom)) + suffixNFA.optional(range.isGreedy) + } + } + + prefixNFA.concatenate(suffixNFA) + if (atom is RegexParser.GroupingCaptureContext) + groupNumbers[atom]?.let { prefixNFA.capture(it, false) } + if (!range.isGreedy) prefixNFA.startState.hasLazyMulti = true + return prefixNFA + } + + override fun visitGroupingCapture(ctx: RegexParser.GroupingCaptureContext): NFA { + val groupNumber = groupNumbers[ctx] ?: groupCount.also { groupNumbers[ctx] = it; groupCount++ } + + val nfa = if (ctx.sub_pattern() == null) NFA.fromSingleState() else visit(ctx.sub_pattern()) + nfa.capture(groupNumber) + + return nfa + } + + override fun visitGroupingNoCapture(ctx: RegexParser.GroupingNoCaptureContext): NFA { + return if (ctx.sub_pattern() == null) NFA.fromSingleState() + else visit(ctx.sub_pattern()) + } + + override fun visitLiteralChar(ctx: RegexParser.LiteralCharContext): NFA { + return NFA.fromMatcher(CharacterMatcher(cleanLiteralChar(ctx.text))) + } + + override fun visitAnyChar(ctx: RegexParser.AnyCharContext?): NFA { + return NFA.fromMatcher(DotMatcher(false)) + } + + override fun visitAnyCharNL(ctx: RegexParser.AnyCharNLContext?): NFA { + return NFA.fromMatcher(DotMatcher(true)) + } + + override fun visitCursor(ctx: RegexParser.CursorContext?): NFA { + return NFA.fromMatcher(CursorMatcher()) + } + + override fun visitIdentifier(ctx: RegexParser.IdentifierContext): NFA { + val base = { char: Char -> char.isJavaIdentifierPart() } + return if (ctx.text.contains('_')) + NFA.fromMatcher( + PredicateMatcher { char -> char == '\n' || base(char) } + ) + else + NFA.fromMatcher( + PredicateMatcher { char -> base(char) } + ) + } + + override fun visitIdentifierNotDigit(ctx: RegexParser.IdentifierNotDigitContext): NFA { + val base = { char: Char -> !char.isDigit() && char.isJavaIdentifierPart() } + return if (ctx.text.contains('_')) + NFA.fromMatcher( + PredicateMatcher { char -> char == '\n' || base(char) } + ) + else + NFA.fromMatcher( + PredicateMatcher { char -> base(char) } + ) + } + + override fun visitKeyword(ctx: RegexParser.KeywordContext): NFA { + val base = { editor: VimEditor, char: Char -> KeywordOptionHelper.isKeyword(editor, char) } + return if (ctx.text.contains('_')) + NFA.fromMatcher(EditorAwarePredicateMatcher { editor, char -> char == '\n' || base(editor, char) }) + else + NFA.fromMatcher(EditorAwarePredicateMatcher { editor, char -> base(editor, char) }) + } + + override fun visitKeywordNotDigit(ctx: RegexParser.KeywordNotDigitContext): NFA { + val base = { editor: VimEditor, char: Char -> !char.isDigit() && KeywordOptionHelper.isKeyword(editor, char) } + return if (ctx.text.contains('_')) + NFA.fromMatcher(EditorAwarePredicateMatcher { editor, char -> char == '\n' || base(editor, char) }) + else + NFA.fromMatcher(EditorAwarePredicateMatcher { editor, char -> base(editor, char) }) + } + + override fun visitFilename(ctx: RegexParser.FilenameContext): NFA { + val base = { char: Char -> char.isLetterOrDigit() || "_/.-+,#$%~=".contains(char) } + return if (ctx.text.contains('_')) + NFA.fromMatcher( + PredicateMatcher { char -> char == '\n' || base(char) } + ) + else + NFA.fromMatcher( + PredicateMatcher { char -> base(char) } + ) + } + + override fun visitFilenameNotDigit(ctx: RegexParser.FilenameNotDigitContext): NFA { + val base = { char: Char -> char.isLetter() || "_/.-+,#$%~=".contains(char) } + return if (ctx.text.contains('_')) + NFA.fromMatcher( + PredicateMatcher { char -> char == '\n' || base(char) } + ) + else + NFA.fromMatcher( + PredicateMatcher { char -> base(char) } + ) + } + + override fun visitPrintable(ctx: RegexParser.PrintableContext): NFA { + val base = { char: Char -> !char.isISOControl() } + return if (ctx.text.contains('_')) + NFA.fromMatcher( + PredicateMatcher { char -> char == '\n' || base(char) } + ) + else + NFA.fromMatcher( + PredicateMatcher { char -> base(char) } + ) + } + + override fun visitPrintableNotDigit(ctx: RegexParser.PrintableNotDigitContext): NFA { + val base = { char: Char -> !char.isDigit() && !char.isISOControl() } + return if (ctx.text.contains('_')) + NFA.fromMatcher( + PredicateMatcher { char -> char == '\n' || base(char) } + ) + else + NFA.fromMatcher( + PredicateMatcher { char -> base(char) } + ) + } + + override fun visitWhitespace(ctx: RegexParser.WhitespaceContext): NFA { + return NFA.fromMatcher(CollectionMatcher( + setOf(' ', '\t'), + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + )) + } + + override fun visitNotWhitespace(ctx: RegexParser.NotWhitespaceContext): NFA { + return NFA.fromMatcher(CollectionMatcher( + setOf(' ', '\t'), + isNegated = true, + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + )) + } + + override fun visitDigit(ctx: RegexParser.DigitContext): NFA { + return NFA.fromMatcher( + CollectionMatcher( + ranges = listOf(CollectionRange('0', '9')), + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + ) + ) + } + + override fun visitNotDigit(ctx: RegexParser.NotDigitContext): NFA { + return NFA.fromMatcher( + CollectionMatcher( + ranges = listOf(CollectionRange('0', '9')), + isNegated = true, + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + ) + ) + } + + override fun visitHex(ctx: RegexParser.HexContext): NFA { + return NFA.fromMatcher( + CollectionMatcher( + ranges = listOf( + CollectionRange('0', '9'), + CollectionRange('A', 'F'), + CollectionRange('a', 'f'), + ), + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + ) + ) + } + + override fun visitNotHex(ctx: RegexParser.NotHexContext): NFA { + return NFA.fromMatcher( + CollectionMatcher( + ranges = listOf( + CollectionRange('0', '9'), + CollectionRange('A', 'F'), + CollectionRange('a', 'f'), + ), + isNegated = true, + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + ) + ) + } + + override fun visitOctal(ctx: RegexParser.OctalContext): NFA { + return NFA.fromMatcher( + CollectionMatcher( + ranges = listOf(CollectionRange('0', '7')), + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + ) + ) + } + + override fun visitNotOctal(ctx: RegexParser.NotOctalContext): NFA { + return NFA.fromMatcher( + CollectionMatcher( + ranges = listOf(CollectionRange('0', '7')), + isNegated = true, + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + ) + ) + } + + override fun visitWordchar(ctx: RegexParser.WordcharContext): NFA { + return NFA.fromMatcher( + CollectionMatcher( + chars = setOf('_'), + ranges = listOf( + CollectionRange('0', '9'), + CollectionRange('A', 'Z'), + CollectionRange('a', 'z'), + ), + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + ) + ) + } + + override fun visitNotwordchar(ctx: RegexParser.NotwordcharContext): NFA { + return NFA.fromMatcher( + CollectionMatcher( + chars = setOf('_'), + ranges = listOf( + CollectionRange('0', '9'), + CollectionRange('A', 'Z'), + CollectionRange('a', 'z'), + ), + isNegated = true, + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + ) + ) + } + + override fun visitHeadofword(ctx: RegexParser.HeadofwordContext): NFA { + return NFA.fromMatcher( + CollectionMatcher( + chars = setOf('_'), + ranges = listOf( + CollectionRange('A', 'Z'), + CollectionRange('a', 'z'), + ), + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + ) + ) + } + + override fun visitNotHeadOfWord(ctx: RegexParser.NotHeadOfWordContext): NFA { + return NFA.fromMatcher( + CollectionMatcher( + chars = setOf('_'), + ranges = listOf( + CollectionRange('A', 'Z'), + CollectionRange('a', 'z'), + ), + isNegated = true, + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + ) + ) + } + + override fun visitAlpha(ctx: RegexParser.AlphaContext): NFA { + return NFA.fromMatcher( + CollectionMatcher( + ranges = listOf( + CollectionRange('A', 'Z'), + CollectionRange('a', 'z'), + ), + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + ) + ) + } + + override fun visitNotAlpha(ctx: RegexParser.NotAlphaContext): NFA { + return NFA.fromMatcher( + CollectionMatcher( + ranges = listOf( + CollectionRange('A', 'Z'), + CollectionRange('a', 'z'), + ), + isNegated = true, + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + ) + ) + } + + override fun visitLcase(ctx: RegexParser.LcaseContext): NFA { + return NFA.fromMatcher( + CollectionMatcher( + ranges = listOf(CollectionRange('a', 'z')), + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + ) + ) + } + + override fun visitNotLcase(ctx: RegexParser.NotLcaseContext): NFA { + return NFA.fromMatcher( + CollectionMatcher( + ranges = listOf(CollectionRange('a', 'z')), + isNegated = true, + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + ) + ) + } + + override fun visitUcase(ctx: RegexParser.UcaseContext): NFA { + return NFA.fromMatcher( + CollectionMatcher( + ranges = listOf(CollectionRange('A', 'Z')), + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + ) + ) + } + + override fun visitNotUcase(ctx: RegexParser.NotUcaseContext): NFA { + return NFA.fromMatcher( + CollectionMatcher( + ranges = listOf(CollectionRange('A', 'Z')), + isNegated = true, + includesEOL = ctx.text.contains('_'), + forceNoIgnoreCase = true + ) + ) + } + + override fun visitEsc(ctx: RegexParser.EscContext?): NFA { + return NFA.fromMatcher( + CharacterMatcher('') + ) + } + + override fun visitTab(ctx: RegexParser.TabContext?): NFA { + return NFA.fromMatcher( + CharacterMatcher('\t') + ) + } + + override fun visitCR(ctx: RegexParser.CRContext?): NFA { + return NFA.fromMatcher( + CharacterMatcher('\r') + ) + } + + override fun visitBS(ctx: RegexParser.BSContext?): NFA { + return NFA.fromMatcher( + CharacterMatcher('\b') + ) + } + + override fun visitNL(ctx: RegexParser.NLContext?): NFA { + return NFA.fromMatcher( + CharacterMatcher('\n') + ) + } + + override fun visitCollectionPos(ctx: RegexParser.CollectionPosContext): NFA { + return visitCollection(ctx.collection_elems, false, ctx.COLLECTION_START().text.contains('_')) + } + + override fun visitCollectionNeg(ctx: RegexParser.CollectionNegContext): NFA { + return visitCollection(ctx.collection_elems, true, ctx.COLLECTION_START().text.contains('_')) + } + + private fun visitCollection(collectionElements: List, isNegated: Boolean, includesEOL: Boolean) : NFA { + val individualChars: HashSet = HashSet() + val ranges: ArrayList = ArrayList() + val charClasses: ArrayList<(Char) -> Boolean> = ArrayList() + val collectionElementVisitor = CollectionElementVisitor() + var containsEOL = false + + for (elem in collectionElements) { + val result = collectionElementVisitor.visit(elem) + containsEOL = containsEOL || result.second + val element = result.first + when (element) { + is CollectionElement.SingleCharacter -> { + hasUpperCase = hasUpperCase || element.char.isUpperCase() + individualChars.add(element.char) + } + is CollectionElement.CharacterRange -> ranges.add(CollectionRange(element.start, element.end)) + is CollectionElement.CharacterClassExpression -> charClasses.add(element.predicate) + } + } + + /** + * If the collection is empty, match literally with '[]', or '[^]' if negated + */ + if (individualChars.isEmpty() && ranges.isEmpty() && charClasses.isEmpty()) + return if (isNegated) NFA.fromMatcher(CharacterMatcher('[')) + .concatenate(NFA.fromMatcher(CharacterMatcher('^'))) + .concatenate(NFA.fromMatcher(CharacterMatcher(']'))) + else NFA.fromMatcher(CharacterMatcher('[')) + .concatenate(NFA.fromMatcher(CharacterMatcher(']'))) + + return NFA.fromMatcher( + CollectionMatcher( + individualChars, + ranges, + charClasses, + isNegated, + includesEOL || containsEOL + ) + ) + } + + override fun visitStartMatch(ctx: RegexParser.StartMatchContext?): NFA { + val nfa = NFA.fromSingleState() + nfa.startMatch() + return nfa + } + + override fun visitEndMatch(ctx: RegexParser.EndMatchContext?): NFA { + val nfa = NFA.fromSingleState() + nfa.endMatch() + return nfa + } + + override fun visitBackreference(ctx: RegexParser.BackreferenceContext): NFA { + return NFA.fromMatcher( + BackreferenceMatcher(ctx.text[1].digitToInt()) + ) + } + + override fun visitStartOfFile(ctx: RegexParser.StartOfFileContext?): NFA { + return NFA.fromMatcher(StartOfFileMatcher()) + } + + override fun visitEndOfFile(ctx: RegexParser.EndOfFileContext?): NFA { + return NFA.fromMatcher(EndOfFileMatcher()) + } + + override fun visitStartOfLine(ctx: RegexParser.StartOfLineContext?): NFA { + return NFA.fromMatcher(StartOfLineMatcher()) + } + + override fun visitEndOfLine(ctx: RegexParser.EndOfLineContext?): NFA { + return NFA.fromMatcher(EndOfLineMatcher()) + } + + override fun visitStartOfWord(ctx: RegexParser.StartOfWordContext?): NFA { + return NFA.fromMatcher(StartOfWordMatcher()) + } + + override fun visitEndOfWord(ctx: RegexParser.EndOfWordContext?): NFA { + return NFA.fromMatcher(EndOfWordMatcher()) + } + + override fun visitDecimalCode(ctx: RegexParser.DecimalCodeContext): NFA { + return NFA.fromMatcher( + CharacterMatcher( + Char((if (ctx.text[0] == '\\') ctx.text.substring(3) else ctx.text.substring(2)).toInt()) + ) + ) + } + + override fun visitOctalCode(ctx: RegexParser.OctalCodeContext): NFA { + val code = (if (ctx.text[0] == '\\') ctx.text.substring(3) else ctx.text.substring(2)).toInt(8) + + /** + * An octal code can only go up to 0o377. But the parser still allows codes like 0o400. For these cases, the actual + * code should be 0o40, and that is followed by a literal '0' + */ + return if (code > "377".toInt(8)) { + NFA.fromMatcher( + CharacterMatcher( + Char((if (ctx.text[0] == '\\') ctx.text.substring(3) else ctx.text.substring(2)).dropLast(1).toInt(8)) + ) + ).concatenate( + NFA.fromMatcher( + CharacterMatcher(ctx.text.last()) + ) + ) + } else { + NFA.fromMatcher( + CharacterMatcher( + Char(code) + ) + ) + } + } + + override fun visitHexCode(ctx: RegexParser.HexCodeContext): NFA { + return NFA.fromMatcher( + CharacterMatcher( + Char((if (ctx.text[0] == '\\') ctx.text.substring(3) else ctx.text.substring(2)).toInt(16)) + ) + ) + } + + override fun visitLine(ctx: RegexParser.LineContext): NFA { + return NFA.fromMatcher( + AtLineMatcher(ctx.text.substring(if (ctx.text[0] == '\\') 2 else 1, ctx.text.length - 1).toInt())) + } + + override fun visitBeforeLine(ctx: RegexParser.BeforeLineContext): NFA { + return NFA.fromMatcher( + BeforeLineMatcher(ctx.text.substring(if (ctx.text[0] == '\\') 3 else 2, ctx.text.length - 1).toInt()) + ) + } + + override fun visitAfterLine(ctx: RegexParser.AfterLineContext): NFA { + return NFA.fromMatcher( + AfterLineMatcher(ctx.text.substring(if (ctx.text[0] == '\\') 3 else 2, ctx.text.length - 1).toInt()) + ) + } + + override fun visitColumn(ctx: RegexParser.ColumnContext): NFA { + return NFA.fromMatcher( + AtColumnMatcher(ctx.text.substring(if (ctx.text[0] == '\\') 2 else 1, ctx.text.length - 1).toInt()) + ) + } + + override fun visitBeforeColumn(ctx: RegexParser.BeforeColumnContext): NFA { + return NFA.fromMatcher( + BeforeColumnMatcher(ctx.text.substring(if (ctx.text[0] == '\\') 3 else 2, ctx.text.length - 1).toInt()) + ) + } + + override fun visitAfterColumn(ctx: RegexParser.AfterColumnContext): NFA { + return NFA.fromMatcher( + AfterColumnMatcher(ctx.text.substring(if (ctx.text[0] == '\\') 3 else 2, ctx.text.length - 1).toInt()) + ) + } + + override fun visitLineCursor(ctx: RegexParser.LineCursorContext?): NFA { + return NFA.fromMatcher(AtLineCursorMatcher()) + } + + override fun visitBeforeLineCursor(ctx: RegexParser.BeforeLineCursorContext?): NFA { + return NFA.fromMatcher(BeforeLineCursorMatcher()) + } + + override fun visitAfterLineCursor(ctx: RegexParser.AfterLineCursorContext?): NFA { + return NFA.fromMatcher(AfterLineCursorMatcher()) + } + + override fun visitColumnCursor(ctx: RegexParser.ColumnCursorContext?): NFA { + return NFA.fromMatcher(AtColumnCursorMatcher()) + } + + override fun visitBeforeColumnCursor(ctx: RegexParser.BeforeColumnCursorContext?): NFA { + return NFA.fromMatcher(BeforeColumnCursorMatcher()) + } + + override fun visitAfterColumnCursor(ctx: RegexParser.AfterColumnCursorContext?): NFA { + return NFA.fromMatcher(AfterColumnCursorMatcher()) + } + + override fun visitOptionallyMatched(ctx: RegexParser.OptionallyMatchedContext): NFA { + if (ctx.atoms.isEmpty()) { return NFA.fromSingleState() } // TODO: Throw E70 error + + val nfa = NFA.fromSingleState() + for (atom in ctx.atoms) nfa.concatenate(visit(atom).optional(true)) + return nfa + } + + override fun visitVisual(ctx: RegexParser.VisualContext?): NFA { + return NFA.fromMatcher(VisualAreaMatcher()) + } + + override fun visitMark(ctx: RegexParser.MarkContext): NFA { + return NFA.fromMatcher( + AtMarkMatcher(ctx.text[if (ctx.text[0] == '\\') 3 else 2]) + ) + } + + override fun visitBeforeMark(ctx: RegexParser.BeforeMarkContext): NFA { + return NFA.fromMatcher( + BeforeMarkMatcher(ctx.text[if (ctx.text[0] == '\\') 4 else 3]) + ) + } + + override fun visitAfterMark(ctx: RegexParser.AfterMarkContext): NFA { + return NFA.fromMatcher( + AfterMarkMatcher(ctx.text[if (ctx.text[0] == '\\') 4 else 3]) + ) + } + + private fun cleanLiteralChar(str : String): Char { + return if (str.length == 2 && str[0] == '\\') { + hasUpperCase = hasUpperCase || str[1].isUpperCase() + str[1] + } + else { + hasUpperCase = hasUpperCase || str[0].isUpperCase() + str[0] + } + } + + private fun List.union(): NFA { + return this.foldIndexed(null as NFA?) { index, acc, elem -> + if (index == 0) elem + else acc?.unify(elem) ?: elem + } ?: NFA.fromSingleState() + } + + private fun List.concatenate(): NFA { + return this.foldIndexed(null as NFA?) { index, acc, elem -> + if (index == 0) elem + else acc?.concatenate(elem) ?: elem + } ?: NFA.fromSingleState() + } +} \ No newline at end of file diff --git a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/vimscript/model/commands/MoveTextCommand.kt b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/vimscript/model/commands/MoveTextCommand.kt index 36649b5dc4..e0049d0f95 100644 --- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/vimscript/model/commands/MoveTextCommand.kt +++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/vimscript/model/commands/MoveTextCommand.kt @@ -130,16 +130,16 @@ public data class MoveTextCommand(val ranges: Ranges, val argument: String) : Co ) { var newStartPosition = selectionInfo.start if (startOffset != null && selectionInfo.start != null && range.contains(startOffset)) { - newStartPosition = shiftBufferPosition(selectionInfo.start, shift) + newStartPosition = shiftBufferPosition(selectionInfo.start!!, shift) } var newEndPosition = selectionInfo.end if (endOffset != null && selectionInfo.end != null && range.contains(endOffset)) { - newEndPosition = shiftBufferPosition(selectionInfo.end, shift) + newEndPosition = shiftBufferPosition(selectionInfo.end!!, shift) } if (newStartPosition != selectionInfo.start || newEndPosition != selectionInfo.end) { - caret.lastSelectionInfo = SelectionInfo(newStartPosition, newEndPosition, selectionInfo.type) + caret.lastSelectionInfo = SelectionInfo(newStartPosition, newEndPosition, selectionInfo.selectionType) } } diff --git a/vim-engine/src/test/kotlin/com/maddyhome/idea/vim/regexp/VimRegexTestUtils.kt b/vim-engine/src/test/kotlin/com/maddyhome/idea/vim/regexp/VimRegexTestUtils.kt new file mode 100644 index 0000000000..749b628ff0 --- /dev/null +++ b/vim-engine/src/test/kotlin/com/maddyhome/idea/vim/regexp/VimRegexTestUtils.kt @@ -0,0 +1,240 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp + +import com.maddyhome.idea.vim.api.BufferPosition +import com.maddyhome.idea.vim.api.LocalMarkStorage +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.api.VimEditor +import com.maddyhome.idea.vim.common.Offset +import com.maddyhome.idea.vim.common.TextRange +import com.maddyhome.idea.vim.mark.VimMark +import org.mockito.ArgumentMatchers.anyInt +import org.mockito.Mockito +import org.mockito.kotlin.whenever +import kotlin.test.assertNotEquals +import kotlin.test.fail + +internal object VimRegexTestUtils { + + const val START: String = "" + const val END: String = "" + const val CARET: String = "" + const val VISUAL_START = "" + const val VISUAL_END = "" + private const val MARK = "" + fun MARK(mark: Char): CharSequence { return "" } + + fun mockEditorFromText(text: CharSequence) : VimEditor { + val cleanText = getTextWithoutEditorTags(getTextWithoutRangeTags(text)) + val lines = cleanText.split("\n").map { it + "\n" } + + val editorMock = Mockito.mock() + mockEditorText(editorMock, cleanText) + mockEditorOffsetToBufferPosition(editorMock, lines) + mockEditorBufferPositionToOffset(editorMock, lines) + mockEditorLineStartOffset(editorMock) + mockEditorLineEndOffset(editorMock, lines) + mockEditorLineCount(editorMock, lines) + + val textWithoutRangeTags = getTextWithoutRangeTags(text) + + val carets = mutableListOf() + val textWithOnlyCarets = getTextWithoutVisualTags(getTextWithoutMarkTags(textWithoutRangeTags)) + val textWithOnlyVisuals = getTextWithoutCaretTags(getTextWithoutMarkTags(textWithoutRangeTags)) + val textWithOnlyMarks = getTextWithoutCaretTags(getTextWithoutVisualTags(textWithoutRangeTags)) + + val visualStart = textWithOnlyVisuals.indexOf(VISUAL_START) + val visualEnd = if (visualStart >= 0) textWithOnlyVisuals.indexOf(VISUAL_END) - VISUAL_START.length + else -1 + + val marks = mutableMapOf() + + var nextMark = MARK.toRegex().find(textWithOnlyMarks) + var offset = 0 + while (nextMark != null) { + val nextMarkIndex = nextMark.range.first - offset + offset += MARK.length + marks[nextMark.value[5]] = editorMock.offsetToBufferPosition(nextMarkIndex) + nextMark = nextMark.next() + } + + var nextCaretIndex = textWithOnlyCarets.indexOf(CARET) + offset = 0 + + while (nextCaretIndex != -1) { + carets.add(mockCaret(nextCaretIndex - offset, Pair(visualStart, visualEnd), marks)) + nextCaretIndex = textWithOnlyCarets.indexOf(CARET, nextCaretIndex + CARET.length) + offset += CARET.length + } + + if (carets.isEmpty()) { + // if no carets are provided, place one at the start of the text + val caret = mockCaret(0, Pair(visualStart, visualEnd), marks) + whenever(editorMock.carets()).thenReturn(listOf(caret)) + whenever(editorMock.currentCaret()).thenReturn(caret) + } else { + whenever(editorMock.carets()).thenReturn(carets) + whenever(editorMock.currentCaret()).thenReturn(carets.first()) + } + + return editorMock + } + + fun mockEditor(text: CharSequence, carets: List): VimEditor { + assertNotEquals(0, carets.size, "Expected at least one caret.") + val cleanText = getTextWithoutEditorTags(getTextWithoutRangeTags(text)) + val lines = cleanText.split("\n").map { it + "\n" } + + val editorMock = Mockito.mock() + mockEditorText(editorMock, cleanText) + mockEditorOffsetToBufferPosition(editorMock, lines) + mockEditorBufferPositionToOffset(editorMock, lines) + mockEditorLineStartOffset(editorMock) + mockEditorLineEndOffset(editorMock, lines) + mockEditorLineCount(editorMock, lines) + whenever(editorMock.carets()).thenReturn(carets) + whenever(editorMock.currentCaret()).thenReturn(carets.first()) + + return editorMock + } + + fun mockCaret(caretOffset: Int, visualOffset: Pair = Pair(-1, -1), marks: Map = emptyMap()): VimCaret { + val caretMock = Mockito.mock() + whenever(caretMock.offset).thenReturn(Offset(caretOffset)) + whenever(caretMock.selectionStart).thenReturn(visualOffset.first) + whenever(caretMock.selectionEnd).thenReturn(visualOffset.second) + val markStorage = mockMarkStorage(marks) + whenever(caretMock.markStorage).thenReturn(markStorage) + + return caretMock + } + + private fun mockMarkStorage(marks: Map): LocalMarkStorage { + val markStorage = Mockito.mock() + whenever(markStorage.getMark(Mockito.anyChar())).thenAnswer { invocation -> + val key = invocation.arguments[0] as Char + val position = marks[key] ?: return@thenAnswer null + VimMark(key, position.line, position.column, "", "") + } + return markStorage + } + + private fun getTextWithoutCaretTags(text: CharSequence): CharSequence { + return text.replace(CARET.toRegex(), "") + } + + private fun getTextWithoutVisualTags(text: CharSequence): CharSequence { + return text.replace("$VISUAL_START|$VISUAL_END".toRegex(), "") + } + + private fun getTextWithoutMarkTags(text: CharSequence): CharSequence { + return text.replace(MARK.toRegex(), "") + } + + private fun getTextWithoutEditorTags(text: CharSequence): CharSequence { + return getTextWithoutMarkTags( + getTextWithoutVisualTags( + getTextWithoutCaretTags( + text + ) + ) + ) + } + + private fun mockEditorText(editor: VimEditor, text: CharSequence) { + whenever(editor.text()).thenReturn(text) + } + + fun getMatchRanges(text: CharSequence): List { + val textWithoutEditorTags = getTextWithoutEditorTags(text) + val matchRanges = mutableListOf() + var offset = 0 + var oldOffset = 0 + + var startIndex = textWithoutEditorTags.indexOf(START) + while (startIndex != -1) { + val endIndex = textWithoutEditorTags.indexOf(END, startIndex + START.length) + if (endIndex != -1) { + offset += START.length + matchRanges.add(TextRange(startIndex - oldOffset, endIndex - offset)) + startIndex = textWithoutEditorTags.indexOf(START, endIndex + END.length) + offset += END.length + oldOffset = offset + } else { + fail("Please provide the same number of START and END tags!") + } + } + return matchRanges + } + + private fun getTextWithoutRangeTags(text: CharSequence): CharSequence { + val newText = StringBuilder(text) + var index = newText.indexOf(START) + while (index != -1) { + newText.delete(index, index + START.length) + index = newText.indexOf(START, index) + } + + index = newText.indexOf(END) + while (index != -1) { + newText.delete(index, index + END.length) + index = newText.indexOf(END, index) + } + + return newText + } + + private fun mockEditorOffsetToBufferPosition(editor: VimEditor, lines: List) { + whenever(editor.offsetToBufferPosition(Mockito.anyInt())).thenAnswer { invocation -> + val offset = invocation.arguments[0] as Int + var lineCounter = 0 + var currentOffset = 0 + + while (lineCounter < lines.size && currentOffset + lines[lineCounter].length <= offset) { + currentOffset += lines[lineCounter].length + lineCounter++ + } + + if (lineCounter < lines.size) { + val column = offset - currentOffset + BufferPosition(lineCounter, column) + } else { + BufferPosition(-1, -1) + } + } + } + + private fun mockEditorBufferPositionToOffset(editor: VimEditor, lines: List) { + whenever(editor.bufferPositionToOffset(any(BufferPosition::class.java))).thenAnswer { invocation -> + val position = invocation.arguments[0] as BufferPosition + return@thenAnswer lines.subList(0, position.line).sumOf { it.length } + position.column + } + } + + private fun mockEditorLineStartOffset(editor: VimEditor) { + whenever(editor.getLineStartOffset(anyInt())).thenAnswer { invocation -> + val line = invocation.arguments[0] as Int + editor.bufferPositionToOffset(BufferPosition(line, 0)) + } + } + + private fun mockEditorLineEndOffset(editor: VimEditor, lines: List) { + whenever(editor.getLineEndOffset(anyInt())).thenAnswer { invocation -> + val line = invocation.arguments[0] as Int + editor.bufferPositionToOffset(BufferPosition(line, lines[line].length)) + } + } + + private fun mockEditorLineCount(editor: VimEditor, lines: List) { + whenever(editor.lineCount()).thenReturn(lines.size) + } + + private fun any(type: Class): T = Mockito.any(type) +} \ No newline at end of file diff --git a/vim-engine/src/test/kotlin/com/maddyhome/idea/vim/regexp/api/VimRegexTest.kt b/vim-engine/src/test/kotlin/com/maddyhome/idea/vim/regexp/api/VimRegexTest.kt new file mode 100644 index 0000000000..bc3e72d91f --- /dev/null +++ b/vim-engine/src/test/kotlin/com/maddyhome/idea/vim/regexp/api/VimRegexTest.kt @@ -0,0 +1,452 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.api + +import com.maddyhome.idea.vim.regexp.VimRegex +import com.maddyhome.idea.vim.regexp.VimRegexOptions +import com.maddyhome.idea.vim.regexp.VimRegexTestUtils.END +import com.maddyhome.idea.vim.regexp.VimRegexTestUtils.START +import com.maddyhome.idea.vim.regexp.VimRegexTestUtils.getMatchRanges +import com.maddyhome.idea.vim.regexp.VimRegexTestUtils.mockEditorFromText +import com.maddyhome.idea.vim.regexp.match.VimMatchResult +import org.junit.jupiter.api.Nested +import org.junit.jupiter.api.Test +import kotlin.test.assertEquals +import kotlin.test.fail + +class VimRegexTest { + @Nested + inner class ContainsMatchInTest { + @Test + fun `test single word contains match in editor`() { + doTest( + """ + |Lorem Ipsum + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "dolor", + true + ) + } + + @Test + fun `test single word does not contain match in editor`() { + doTest( + """ + |Lorem Ipsum + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "IdeaVim", + false + ) + } + + private fun doTest( + text: CharSequence, + pattern: String, expectedResult : Boolean + ) { + val editor = mockEditorFromText(text) + val regex = VimRegex(pattern) + val matchResult = regex.containsMatchIn(editor) + assertEquals(expectedResult, matchResult) + } + } + + @Nested + inner class FindNextTest { + @Test + fun `test find single word starting at beginning`() { + doTest( + """ + |Lorem Ipsum + | + |${START}Lorem${END} ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "Lorem", + ) + } + + @Test + fun `test find single word wraps around`() { + doTest( + """ + |${START}Lorem${END} Ipsum + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "Lorem", + 40, + listOf(VimRegexOptions.WRAP_SCAN) + ) + } + + @Test + fun `test find single word doesn't wrap around`() { + assertFailure( + """ + |Lorem Ipsum + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "Lorem", + 40 + ) + } + + @Test + fun `test find word case insensitive prevails`() { + doTest( + """ + |Lorem Ipsum + | + |${START}Lorem${END} ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "\\ClO\\crEm\\C" + ) + } + + private fun doTest( + text: CharSequence, + pattern: String, + startIndex: Int = 0, + options: List = emptyList() + ) { + val editor = mockEditorFromText(text) + val regex = VimRegex(pattern) + val matchResult = regex.findNext(editor, startIndex, options) + when (matchResult) { + is VimMatchResult.Failure -> fail("Expected to find match") + is VimMatchResult.Success -> assertEquals(getMatchRanges(text).firstOrNull(), matchResult.range) + } + } + + private fun assertFailure( + text: CharSequence, + pattern: String, + startIndex: Int = 0, + options: List = emptyList() + ) { + val editor = mockEditorFromText(text) + val regex = VimRegex(pattern) + val matchResult = regex.findNext(editor, startIndex, options) + if (matchResult is VimMatchResult.Success) fail("Expected to not find any match, but one was found at ${matchResult.range}") + } + } + + @Nested + inner class FindPreviousTest { + @Test + fun `test find previous single word starting from offset`() { + doTest( + """ + |${START}Lorem${END} Ipsum + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "Lorem", + 1 + ) + } + + @Test + fun `test find previous single word warps around`() { + doTest( + """ + |Lorem Ipsum + | + |${START}Lorem${END} ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "Lorem", + options = listOf(VimRegexOptions.WRAP_SCAN) + ) + } + + @Test + fun `test find previous single word doesn't warp around`() { + assertFailure( + """ + |Lorem Ipsum + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "Lorem" + ) + } + + private fun doTest( + text: CharSequence, + pattern: String, + startIndex: Int = 0, + options: List = emptyList() + ) { + val editor = mockEditorFromText(text) + val regex = VimRegex(pattern) + val matchResult = regex.findPrevious(editor, startIndex, options) + when (matchResult) { + is VimMatchResult.Failure -> fail("Expected to find match") + is VimMatchResult.Success -> assertEquals(getMatchRanges(text).firstOrNull(), matchResult.range) + } + } + + private fun assertFailure( + text: CharSequence, + pattern: String, + startIndex: Int = 0, + options: List = emptyList() + ) { + val editor = mockEditorFromText(text) + val regex = VimRegex(pattern) + val matchResult = regex.findPrevious(editor, startIndex, options) + if (matchResult is VimMatchResult.Success) fail("Expected to not find any match, but one was found at ${matchResult.range}") + } + } + + @Nested + inner class FindAllTest { + @Test + fun `test find all occurrences of word`() { + doTest( + """ + |${START}Lorem${END} Ipsum + | + |${START}Lorem${END} ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "Lorem" + ) + } + + @Test + fun `test find all occurrences of word from offset`() { + doTest( + """ + |Lorem Ipsum + | + |${START}Lorem${END} ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "Lorem", + 10 + ) + } + + @Test + fun `test find all occurrences of word case insensitive`() { + doTest( + """ + |${START}Lorem${END} Ipsum + | + |${START}Lorem${END} ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "lorem\\c", + ) + } + + @Test + fun `test find all occurrences of word with smartcase ignores case`() { + doTest( + """ + |${START}Lorem Ipsum${END} + | + |${START}Lorem ipsum${END} dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "lorem ipsum", + options = listOf(VimRegexOptions.IGNORE_CASE, VimRegexOptions.SMART_CASE) + ) + } + + @Test + fun `test find all occurrences of word with smartcase doesn't ignore case`() { + doTest( + """ + |${START}Lorem Ipsum${END} + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "Lorem Ipsum", + options = listOf(VimRegexOptions.IGNORE_CASE, VimRegexOptions.SMART_CASE) + ) + } + + private fun doTest( + text: CharSequence, + pattern: String, + startIndex: Int = 0, + options: List = emptyList() + ) { + val editor = mockEditorFromText(text) + val regex = VimRegex(pattern) + val matchResults = regex.findAll(editor, startIndex, options=options) + assertEquals( + getMatchRanges(text).toSet(), matchResults + .map { it.range } + .toSet() + ) + } + } + + @Nested + inner class MatchAtTest { + @Test + fun `test word matches at index`() { + doTest( + """ + |Lorem Ipsum + | + |${START}Lorem${END} ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "Lorem", + 13, + ) + } + + @Test + fun `test word does not match at index`() { + assertFailure( + """ + |Lorem Ipsum + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "Lorem", + 12 + ) + } + + private fun doTest( + text: CharSequence, + pattern: String, + index: Int + ) { + val editor = mockEditorFromText(text) + val regex = VimRegex(pattern) + val matchResult = regex.matchAt(editor, index) + when (matchResult) { + is VimMatchResult.Success -> assertEquals(getMatchRanges(text).firstOrNull(), matchResult.range) + is VimMatchResult.Failure -> fail("Expected to find match.") + } + } + + private fun assertFailure( + text: CharSequence, + pattern: String, + index: Int + ) { + val editor = mockEditorFromText(text) + val regex = VimRegex(pattern) + val matchResult = regex.matchAt(editor, index) + if (matchResult is VimMatchResult.Success) + fail("Expected to not find any matches but instead found match at ${matchResult.range}") + } + } + + @Nested + inner class MatchEntireTest { + @Test + fun `test pattern matches entire editor`() { + doTest( + """ + |${START}Lorem Ipsum + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas.${END} + """.trimMargin(), + "\\_.*", + ) + } + + @Test + fun `test pattern matches string only partially`() { + assertFailure( + """ + |Lorem Ipsum + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "Lorem", + ) + } + + private fun doTest( + text: CharSequence, + pattern: String) { + val editor = mockEditorFromText(text) + val regex = VimRegex(pattern) + val matchResult = regex.matchEntire(editor) + when (matchResult) { + is VimMatchResult.Success -> assertEquals(getMatchRanges(text).firstOrNull(), matchResult.range) + is VimMatchResult.Failure -> fail("Expected to find match.") + } + } + + private fun assertFailure( + text: CharSequence, + pattern: String + ) { + val editor = mockEditorFromText(text) + val regex = VimRegex(pattern) + val matchResult = regex.matchEntire(editor) + if (matchResult is VimMatchResult.Success) + fail("Expected to not find any matches but instead found match at ${matchResult.range}") + } + } +} \ No newline at end of file diff --git a/vim-engine/src/test/kotlin/com/maddyhome/idea/vim/regexp/internal/VimRegexEngineTest.kt b/vim-engine/src/test/kotlin/com/maddyhome/idea/vim/regexp/internal/VimRegexEngineTest.kt new file mode 100644 index 0000000000..50deee352c --- /dev/null +++ b/vim-engine/src/test/kotlin/com/maddyhome/idea/vim/regexp/internal/VimRegexEngineTest.kt @@ -0,0 +1,1704 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.internal + +import com.maddyhome.idea.vim.api.BufferPosition +import com.maddyhome.idea.vim.api.VimCaret +import com.maddyhome.idea.vim.regexp.VimRegexTestUtils.mockEditorFromText +import com.maddyhome.idea.vim.regexp.VimRegexTestUtils.CARET +import com.maddyhome.idea.vim.regexp.VimRegexTestUtils.START +import com.maddyhome.idea.vim.regexp.VimRegexTestUtils.END +import com.maddyhome.idea.vim.regexp.VimRegexTestUtils.MARK +import com.maddyhome.idea.vim.regexp.VimRegexTestUtils.VISUAL_END +import com.maddyhome.idea.vim.regexp.VimRegexTestUtils.VISUAL_START +import com.maddyhome.idea.vim.regexp.VimRegexTestUtils.getMatchRanges +import com.maddyhome.idea.vim.regexp.VimRegexTestUtils.mockCaret +import com.maddyhome.idea.vim.regexp.VimRegexTestUtils.mockEditor +import com.maddyhome.idea.vim.regexp.match.VimMatchResult +import com.maddyhome.idea.vim.regexp.engine.nfa.NFA +import com.maddyhome.idea.vim.regexp.engine.VimRegexEngine +import com.maddyhome.idea.vim.regexp.engine.nfa.matcher.DotMatcher +import com.maddyhome.idea.vim.regexp.parser.VimRegexParser +import com.maddyhome.idea.vim.regexp.parser.VimRegexParserResult +import com.maddyhome.idea.vim.regexp.parser.visitors.PatternVisitor +import org.junit.jupiter.api.Test +import kotlin.test.assertEquals +import kotlin.test.assertTrue +import kotlin.test.fail + +class VimRegexEngineTest { + @Test + fun `test match not found`() { + assertFailure( + """ + |Lorem Ipsum + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "VIM" + ) + } + + @Test + fun `test concatenation from start`() { + doTest( + """ + |${START}Lorem$END Ipsum + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "Lorem", + ) + } + + @Test + fun `test concatenation from offset`() { + doTest( + """ + |Lorem Ipsum + | + |${START}Lorem$END ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "Lorem", + 13 + ) + } + + @Test + fun `test concatenation with escaped char`() { + doTest( + "${START}a*${END}bcd", + "a\\*", + ) + } + + @Test + fun `test star multi`() { + doTest( + "${START}aaaaa${END}bcd", + "a*", + ) + } + + @Test + fun `test star multi empty match`() { + doTest( + "$START${END}bcd", + "a*", + ) + } + + @Test + fun `test plus multi`() { + doTest( + "${START}aaaaa${END}bcd", + "a\\+", + ) + } + + @Test + fun `test plus multi should fail`() { + assertFailure( + "bcd", + "a\\+" + ) + } + + @Test + fun `test range multi both bounds`() { + doTest( + "${START}aaa${END}aabcd", + "a\\{0,3}", + ) + } + + @Test + fun `test range multi lower bound`() { + doTest( + "${START}aaaaa${END}bcd", + "a\\{2,}", + ) + } + + @Test + fun `test range multi upper bound`() { + doTest( + "${START}aa${END}aaabcd", + "a\\{,2}", + ) + } + + @Test + fun `test range unbounded`() { + doTest( + "${START}aaaaa${END}bcd", + "a\\{}", + ) + } + + @Test + fun `test range unbounded with comma`() { + doTest( + "${START}aaaaa${END}bcd", + "a\\{,}", + ) + } + + @Test + fun `test range absolute bound`() { + doTest( + "${START}aa${END}aaabcd", + "a\\{2}", + ) + } + + @Test + fun `test range should fail`() { + assertFailure( + "aaaaabcd", + "a\\{6,}" + ) + } + + @Test + fun `test group`() { + doTest( + "${START}Lorem$END Ipsum", + "\\v(Lorem)", + ) + } + + @Test + fun `test group followed by word`() { + doTest( + "${START}Lorem Ipsum$END", + "\\v(Lorem) Ipsum", + ) + } + + @Test + fun `test capture group 1`() { + doTest( + "${START}Lorem$END Ipsum", + "\\v(Lorem) Ipsum", + groupNumber = 1 + ) + } + + @Test + fun `test capture group 2`() { + doTest( + "Lorem ${START}Ipsum$END", + "\\v(Lorem) (Ipsum)", + groupNumber = 2 + ) + } + + @Test + fun `test group updates range`() { + doTest( + "abab${START}ab${END}c", + "\\v(ab)*c", + groupNumber = 1 + ) + } + + @Test + fun `test empty group`() { + doTest( + "$START${END}Lorem Ipsum", + "\\v()", + ) + } + + @Test + fun `test alternation with star multi`() { + doTest( + "${START}abc$END", + "\\v%(a|b)*c", + ) + } + + @Test + fun `test star multi has to backtrack`() { + doTest( + "${START}a$END", + "a*a", + ) + } + + @Test + fun `test multiple paths to loop`() { + doTest( + "${START}ababc$END", + "\\v(a|b)+c=", + ) + } + + @Test + fun `test nested multi`() { + doTest( + "${START}aaaa$END", + "\\v(a=)*", + ) + } + + @Test + fun `test nested multi madness`() { + doTest( + "${START}acabcdabcacd$END", + "\\v((ab=c+)+d)*", + ) + } + + @Test + fun `test lazy multi doesn't consume anything`() { + doTest( + "$START${END}aaaaa", + "a\\{-}", + ) + } + + @Test + fun `test closest matching quotes`() { + doTest( + """ + |$START"Lorem"$END "Ipsum" + """.trimMargin(), + "\".\\{-}\"", + ) + } + + @Test + fun `test farthest matching quotes`() { + doTest( + """ + |$START"Lorem" "Ipsum"$END + """.trimMargin(), + "\".\\{}\"", + ) + } + + @Test + fun `text sequence of any characters`() { + doTest( + """ + |${START}Lorem Ipsum$END + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + ".*", + ) + } + + @Test + fun `test sequence of any characters with newline`() { + doTest( + """ + |${START}Lorem Ipsum + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas.$END + """.trimMargin(), + "\\_.*", + ) + } + + @Test + fun `test single cursor`() { + doTest( + "${START}Lo${CARET}rem$END Ipsum", + "Lo\\%#rem", + ) + } + + @Test + fun `test single cursor should fail`() { + assertFailure( + "Lo${CARET}rem Ipsum", + "\\%#Lorem" + ) + } + + @Test + fun `test words separated by spaces`() { + doTest( + "${START}Lorem \t Ipsum$END", + "\\v\\w+\\s+\\w+", + ) + } + + @Test + fun `test date format 1`() { + doTest( + "${START}08-08-2023$END", + "\\v\\d{2}%(-|/)\\d{2}%(-|/)%(\\d{4}|\\d{2})", + ) + } + + @Test + fun `test date format 2`() { + doTest( + "${START}08/08/2023$END", + "\\v\\d{2}%(-|/)\\d{2}%(-|/)%(\\d{4}|\\d{2})", + ) + } + + @Test + fun `test date format 3`() { + doTest( + "${START}08/08/23$END", + "\\v\\d{2}%(-|/)\\d{2}%(-|/)%(\\d{4}|\\d{2})", + ) + } + + @Test + fun `test hexadecimal number 1`() { + doTest( + "${START}0x193ab3f$END is a hexadecimal number", + "\\v%(0x)?\\x+", + ) + } + + @Test + fun `test hexadecimal number 2`() { + doTest( + "${START}abcdef23901a$END is also a hexadecimal number", + "\\v%(0x)?\\x+", + ) + } + + @Test + fun `test name surname`() { + doTest( + "${START}Emanuel Gestosa$END", + "\\v\\u\\l+\\s+\\u\\l+", + ) + } + + @Test + fun `test name surname invalid`() { + assertFailure( + "EmaNuel gestosa", + "\\v\\u\\l+\\s+\\u\\l+" + ) + } + + @Test + fun `test sequence of digits`() { + doTest( + "${START}45135${END}abc235", + "\\d\\+", + ) + } + + @Test + fun `test sequence of not digits`() { + doTest( + "${START}abcd${END}123efg", + "\\D\\+", + ) + } + + @Test + fun `test empty collection`() { + doTest( + "$START[]${END}abc", + "[]", + ) + } + + @Test + fun `test empty negated collection`() { + doTest( + "$START[^]${END}abc", + "[^]", + ) + } + + @Test + fun `test collection a to z and 0`() { + doTest( + "${START}abcd0efg${END}1hij", + "[a-z0]\\+", + ) + } + + @Test + fun `test collection a to z and 0 negated`() { + doTest( + "${START}ABCD${END}0EFG1HIJ", + "[^a-z0]\\+", + ) + } + + @Test + fun `test collection dash and a to z`() { + doTest( + "${START}a-b-c-d${END}_f-g", + "[-a-z]\\+", + ) + } + + @Test + fun `test collection a, dash and z`() { + doTest( + "${START}az-${END}e", + "[a\\-z]\\+", + ) + } + + @Test + fun `test collection backslash and a`() { + doTest( + "$START\\aa\\${END}bc", + "[\\a]\\+", + ) + } + + @Test + fun `test collection unicode a to unicode z`() { + doTest( + "${START}abcdf${END}123", + "[\\u61-\\u007a]\\+", + ) + } + + @Test + fun `test collection backslash, u and z`() { + doTest( + "${START}uz\\zu${END}abc", + "[\\uz]\\+", + ) + } + + @Test + fun `test set start of match`() { + doTest( + "end${START}if$END", + "end\\zsif", + ) + } + + @Test + fun `test set end of match`() { + doTest( + "${START}end${END}if", + "end\\zeif", + ) + } + + @Test + fun `test set multiple start of match`() { + doTest( + "end${START}if$END", + "\\zse\\zsn\\zsd\\zsif", + ) + } + + @Test + fun `test set multiple end of match`() { + doTest( + "${START}end${END}if", + "\\zee\\zen\\zed\\zeif", + ) + } + + @Test + fun `test set match start after set match end`() { + doTest( + "end${START}if$END", + "\\zeend\\zsif", + ) + } + + @Test + fun `test backreference to group 1`() { + doTest( + "${START}cat cat$END", + "\\v(dog|cat) \\1", + ) + } + + @Test + fun `test backreference should fail`() { + assertFailure( + "dog cat", + "\\v(dog|cat) \\1" + ) + } + + @Test + fun `test backreference to un-captured group`() { + doTest( + "$START${END}aaa", + "\\v(b)*\\1", + ) + } + + @Test + fun `test back-referenced group value updates`() { + doTest( + "${START}aaabb$END", + "\\v(a|b){1,100}\\1", + ) + } + + @Test + fun `test capturing inner nested group`() { + doTest( + "abaa${START}b${END}b", + "\\v(a(b)?)+", + groupNumber = 2 + ) + } + + @Test + fun `test case insensitive word`() { + doTest( + "${START}IdeaVim$END", + "ideavim", + ignoreCase = true + ) + } + + @Test + fun `test case insensitive collection`() { + doTest( + "${START}IdeaVim$END", + "[a-z]\\+", + ignoreCase = true + ) + } + + @Test + fun `test character classes never ignore case`() { + assertFailure( + "IDEAVIM", + "\\l\\+", + ignoreCase = true + ) + } + + @Test + fun `test start of file`() { + doTest( + "${START}Idea${END}Vim", + "\\%^Idea", + ) + } + @Test + fun `test start of file should fail`() { + assertFailure( + "IdeaVim", + "\\%^Vim", + ) + } + + @Test + fun `test end of file`() { + doTest( + "Idea${START}Vim$END", + "Vim\\%$", + ) + } + + @Test + fun `test end of file should fail`() { + assertFailure( + "IdeaVim", + "Idea\\%$" + ) + } + + @Test + fun `test start and end of file`() { + doTest( + "${START}IdeaVim$END", + "\\%^IdeaVim\\%$", + ) + } + + @Test + fun `test for empty file`() { + doTest( + "$START$END", + "\\v%^%$", + ) + } + + @Test + fun `test for empty file should fail`() { + assertFailure( + "IdeaVim", + "\\v%^%$" + ) + } + + @Test + fun `test start of line`() { + doTest( + """ + |Lorem Ipsum + | + |${START}Lorem$END ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "^Lorem", + 1 + ) + } + + @Test + fun `test start of line should fail`() { + assertFailure( + """ + |Lorem Ipsum + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "^Ipsum", + ) + } + + @Test + fun `test end of line`() { + doTest( + """ + |Lorem ${START}Ipsum$END + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "Ipsum$", + ) + } + + @Test + fun `test end of line should fail`() { + assertFailure( + """ + |Lorem Ipsum + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "Lorem$" + ) + } + + @Test + fun `test start of line after alternation`() { + doTest( + "${START}dog$END barks", + "^cat\\|^dog", + ) + } + + @Test + fun `test end of line before alternation`() { + doTest( + """ + |"${START}cat$END + |"meows" + """.trimMargin(), + "cat$\\|dog$", + ) + } + + @Test + fun `test start and end of line inside parenthesis`() { + doTest( + "${START}cat meows$END", + "\\v(^(cat|dog)) ((meows|barks)$)", + ) + } + + @Test + fun `test caret is taken literally`() { + doTest( + "${START}the symbol ^ is known as caret.$END", + "^.\\+^.\\+$", + ) + } + + @Test + fun `test dollar sign is taken literally`() { + doTest( + "${START}the symbol for the dollar is $.$END", + "^.\\+$.\\+$", + ) + } + + @Test + fun `test caret is taken literally at the start of pattern`() { + doTest( + "$START^ is known$END as caret.", + "\\^ is known", + ) + } + + @Test + fun `test dollar sign is taken literally at the end of pattern`() { + doTest( + "the symbol for the ${START}dollar is $$END.", + "dollar is \\$", + ) + } + + @Test + fun `test start of line anywhere in pattern`() { + doTest( + """ + |${START}Lorem Ipsum Lorem + | + |Lorem$END ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "\\_.\\+\\_^Lorem", + ) + } + + @Test + fun `test end of line anywhere in pattern`() { + doTest( + """ + |${START}Lorem Ipsum + | + |${END}Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "Lorem Ipsum\\_$\\_s*", + ) + } + + @Test + fun `test atomic group 1`() { + doTest( + "${START}aaab$END", + "\\(a*\\)\\@>b", + ) + } + + @Test + fun `test atomic group 2`() { + doTest( + "${START}Lorem$END Ipsum", + "\\v.*(Lorem)@>", + ) + } + + @Test + fun `test atomic group should fail`() { + /** + * This pattern should fail because the "a*" consumes + * all three 'a's, then the last "a" in the pattern + * fails to match since all 'a's have been consumed. + * Normally, it would try to backtrack and the "a*" + * would only consume two 'a's, leaving the last one to + * match with "a", but since the "a*" is atomic, it can't + * try matching with shorter or longer sub-matches, + * therefore the simulation immediately fails. + */ + assertFailure( + "aaa", + "\\(a*\\)\\@>a" + ) + } + + @Test + fun `test collection with EOL`() { + doTest( + """ + |${START}Lorem Ipsum + | + |${END}123Lorem ipsum dolor sit amet, Lorem + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "\\_[a-z A-Z]\\+", + ) + } + + @Test + fun `test negated collection with EOL includes EOL anyway`() { + doTest( + """ + |${START}Lorem Ipsum + | + |${END}123Lorem ipsum dolor sit amet, Lorem + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "\\_[^0-9]\\+", + ) + } + + @Test + fun `test collection decimal range`() { + doTest( + "${START}Lorem$END Ipsum", + "[\\d65-\\d122]*", + ) + } + + @Test + fun `test collection octal range`() { + doTest( + "${START}Lorem$END Ipsum", + "[\\o101-\\o172]*", + ) + } + + @Test + fun `test collection hex range`() { + doTest( + "${START}Lorem$END Ipsum", + "[\\x41-\\x7a]*", + ) + } + + @Test + fun `test collection unicode range`() { + doTest( + "${START}Lorem$END Ipsum", + "[\\u0041-\\u007a]*", + ) + } + + @Test + fun `test collection wide unicode range`() { + doTest( + "${START}Lorem$END Ipsum", + "[\\U00000041-\\U007a]*", + ) + } + + @Test + fun `test collection with escaped new line`() { + doTest( + "${START}Lorem Ipsum\n" + + "Lorem ${END}123", + "[\\n a-zA-Z]*", + ) + } + + @Test + fun `test collection with character class expression`() { + doTest( + "${START}Lorem$END Ipsum", + "[[:upper:][:lower:]]*", + ) + } + + @Test + fun `test collection with character class expression, range and single elements`() { + doTest( + "$START/unix/file/path/../path/.$END", + "[-./[:alpha:]0-9_~]\\+", + ) + } + + @Test + fun `test positive lookahead 1`() { + doTest( + "${START}Lorem$END Ipsum", + "Lorem\\( Ipsum\\)\\@=", + ) + } + + @Test + fun `test positive lookahead 2`() { + doTest( + "${START}Lorem Ipsum$END", + "Lorem\\( Ipsum\\)\\@= Ipsum", + ) + } + + @Test + fun `test positive lookahead 3`() { + doTest( + "${START}Lorem$END Ipsum", + "\\vLorem( Ipsum)@=( Ipsum)@=( Ipsum)@=( Ipsum)@=( Ipsum)@=", + ) + } + + @Test + fun `test positive lookahead should fail 1`() { + assertFailure( + "Lorem Ipsum", + "Lorem\\( Lorem\\)\\@=" + ) + } + + @Test + fun `test positive lookahead should fail 2`() { + assertFailure( + "Lorem Ipsum Lorem", + "Lorem\\( Ipsum\\)\\@= Lorem" + ) + } + + @Test + fun `test negative lookahead 1`() { + doTest( + "${START}Lorem$END Ipsum", + "Lorem\\( Lorem\\)\\@!", + ) + } + + @Test + fun `test negative lookahead 2`() { + doTest( + "${START}Lorem Ipsum$END", + "Lorem\\( Lorem\\)\\@! Ipsum", + ) + } + + @Test + fun `test negative lookahead 3`() { + doTest( + "${START}Lorem$END Ipsum", + "\\vLorem( Lorem)@!( Lorem)@!( Lorem)@!( Lorem)@!( Lorem)@!", + ) + } + + @Test + fun `test negative lookahead should fail 1`() { + assertFailure( + "Lorem Ipsum", + "Lorem\\( Ipsum\\)\\@!" + ) + } + + @Test + fun `test negative lookahead should fail 2`() { + assertFailure( + "Lorem Ipsum", + "\\vLorem( Lorem)@!( Lorem)@!( Lorem)@!( Ipsum)@!( Lorem)@!" + ) + } + + @Test + fun `test double negative lookahead equals a positive`() { + doTest( + "${START}Lorem$END Ipsum", + "\\vLorem(( Ipsum)@!)@!", + ) + } + + @Test + fun `test double positive lookahead equals a positive`() { + doTest( + "${START}Lorem$END Ipsum", + "\\vLorem(( Ipsum)@=)@=", + ) + } + + @Test + fun `test positive and negative lookahead equals a negative`() { + doTest( + "${START}Lorem$END Ipsum", + "\\vLorem(( Lorem)@=)@!", + ) + } + + @Test + fun `test negative and positive lookahead equals a negative`() { + doTest( + "${START}Lorem$END Ipsum", + "\\vLorem(( Lorem)@!)@=", + ) + } + + @Test + fun `test negative and positive lookahead equals a negative and fails`() { + assertFailure( + "Lorem Ipsum", + "\\vLorem(( Ipsum)@!)@=" + ) + } + + @Test + fun `test positive lookahead with nested capturing groups`() { + doTest( + "${START}Lorem$END Ipsum", + "\\v(Lorem( Ipsum)@=)", + ) + } + + @Test + fun `test positive lookahead with multiple conditions`() { + doTest( + "${START}Lorem Ipsum$END", + "\\vLorem( Ipsum)@=( XYZ| Ipsum)", + ) + } + + @Test + fun `test negative lookahead with nested capturing groups`() { + doTest( + "${START}Lorem$END Ipsum", + "\\v(Lorem( XYZ)@!)", + ) + } + + @Test + fun `test negative lookahead with multiple conditions`() { + doTest( + "${START}Lorem Ipsum$END", + "\\vLorem( XYZ)@!( XYZ| Ipsum)", + ) + } + + @Test + fun `test combination of positive and negative lookahead`() { + doTest( + "${START}Lorem Ipsum$END", + "\\vLorem( Ipsum)@=( Ipsum( Lorem)@!)", + ) + } + + @Test + fun `test AND operator 1`() { + doTest( + "${START}Lorem$END Ipsum", + "Lorem Ipsum\\&.....", + ) + } + + @Test + fun `test AND operator 2`() { + doTest( + "${START}Lorem Ipsum$END", + ".*Ip\\&.*sum", + ) + } + + @Test + fun `test multiple AND operators`() { + doTest( + "${START}Lorem$END Ipsum", + ".*Ip\\&.*sum\\&Lorem Ipsum\\&Lorem", + ) + } + + @Test + fun `test AND operator inside group followed by word`() { + doTest( + "${START}Lorem Ipsum$END", + "\\v(Lorem&.*) Ipsum", + ) + } + + @Test + fun `test AND operator inside group correct capture`() { + doTest( + "${START}Lorem$END Ipsum", + "\\v(Lorem&.*) Ipsum", + groupNumber = 1 + ) + } + + @Test + fun `test AND operator should fail`() { + assertFailure( + "Lorem Ipsum", + "Ipsum\\&Lorem" + ) + } + + @Test + fun `test positive lookbehind 1`() { + doTest( + "Lorem ${START}Ipsum$END", + "\\v(Lorem )@<=Ipsum", + ) + } + + @Test + fun `test positive lookbehind 2`() { + doTest( + "Lor${START}e${END}m Ipsum", + "\\v(\\w{3})@<=\\w", + ) + } + + @Test + fun `test positive lookbehind 3`() { + doTest( + "Lorem ${START}Ipsum$END", + "\\v(\\s+)@<=\\w+", + ) + } + + @Test + fun `test positive lookbehind should fail 1`() { + assertFailure( + "Lorem Ipsum", + "\\v(Lorem)@<=Lorem" + ) + } + + @Test + fun `test positive lookbehind should fail 2`() { + assertFailure( + "Lorem Ipsum", + "\\v(Lorem )@<=(Lorem )@<=(Ipsum )@<=(Lorem )@<=Ipsum" + ) + } + + @Test + fun `test negative lookbehind 1`() { + doTest( + "Lorem ${START}Ipsum$END", + "\\v(Ipsum)@2lLorem", + ) + } + + @Test + fun `test match character at column 11`() { + doTest( + """ + |Lorem Ipsu${START}m$END + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "\\%11cm", + ) + } + + @Test + fun `test match characters before column 11`() { + doTest( + """ + |Lore${START}m$END Ipsum + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "\\%<11cm" + ) + } + + @Test + fun `test match characters after column 6`() { + doTest( + """ + |Lorem Ipsu${START}m$END + | + |Lorem ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "\\%>6cm" + ) + } + + @Test + fun `test match characters at cursor line`() { + doTest( + """ + |Lorem Ipsum + | + |${START}Lor${END}em${CARET} ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "\\%.l...", + ) + } + + @Test + fun `test match characters before cursor line`() { + doTest( + """ + |${START}Lor${END}em Ipsum + | + |Lorem${CARET} ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "\\%<.l..." + ) + } + + @Test + fun `test match characters after cursor line`() { + doTest( + """ + |Lorem Ipsum + | + |Lorem${CARET} ipsum dolor sit amet, + |${START}con${END}sectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "\\%>.l..." + ) + } + + @Test + fun `test match characters at cursor column`() { + doTest( + """ + |Lorem$START Ip${END}sum + | + |Lorem${CARET} ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "\\%.c..." + ) + } + + @Test + fun `test match characters before cursor column`() { + doTest( + """ + |${START}Lor${END}em Ipsum + | + |Lorem${CARET} ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "\\%<.c..." + ) + } + + @Test + fun `test match characters after cursor column`() { + doTest( + """ + |Lorem ${START}Ips${END}um + | + |Lorem${CARET} ipsum dolor sit amet, + |consectetur adipiscing elit + |Sed in orci mauris. + |Cras id tellus in ex imperdiet egestas. + """.trimMargin(), + "\\%>.c..." + ) + } + + @Test + fun `test optionally matched characters 1`() { + doTest( + "${START}substitute${END}", + "s\\%[ubstitute]" + ) + } + + @Test + fun `test optionally matched characters 2`() { + doTest( + "${START}sub${END}", + "s\\%[ubstitute]" + ) + } + + @Test + fun `test optionally matched characters 3`() { + doTest( + "${START}s${END}", + "s\\%[ubstitute]" + ) + } + + @Test + fun `test optionally matched sequence with collection 1`() { + doTest( + "${START}read${END}", + "r\\%[[eo]ad]" + ) + } + + @Test + fun `test optionally matched sequence with collection 2`() { + doTest( + "${START}road${END}", + "r\\%[[eo]ad]" + ) + } + + @Test + fun `test optionally matched sequence with collection 3`() { + doTest( + "${START}rea${END}", + "r\\%[[eo]ad]" + ) + } + + @Test + fun `test optionally matched sequence with collection 4`() { + doTest( + "${START}roa${END}", + "r\\%[[eo]ad]" + ) + } + + @Test + fun `test optionally matched sequence with escaped brackets 1`() { + doTest( + "${START}index[0]${END}", + "index\\%[\\[0\\]]" + ) + } + + @Test + fun `test optionally matched sequence with escaped brackets 2`() { + doTest( + "${START}index[${END}", + "index\\%[\\[0\\]]" + ) + } + + @Test + fun `test optionally matched sequence with group 1`() { + doTest( + "${START}function${END}", + "\\vf%[(un)ction]" + ) + } + + @Test + fun `test optionally matched sequence with group 2`() { + doTest( + "${START}f${END}u", + "\\vf%[(un)ction]" + ) + } + + @Test + fun `test mark does not exist`() { + assertFailure( + "Lorem ${MARK('m')}Ipsum", + "\\%'n..." + ) + } + + @Test + fun `test pattern with multiple cursors at different indexes fails`() { + assertFailure( + "${CARET}Lorem ${CARET}Ipsum", + "\\%#.\\+\\%#" + ) + } + + @Test + fun `test pattern with single dollar sign`() { + doTest( + "Lorem\$Ipsum${START}${END}", + "$" + ) + } + + @Test + fun `test pattern with single caret symbol`() { + doTest( + "${START}${END}Lorem^Ipsum", + "^" + ) + } + + companion object { + private fun assertFailure( + text: CharSequence, + pattern: String, + offset: Int = 0, + ignoreCase: Boolean = false + ) { + val editor = mockEditorFromText(text) + val nfa = buildNFA(pattern) + assertTrue(VimRegexEngine.simulate(nfa, editor, offset, ignoreCase) is VimMatchResult.Failure) + } + + private fun doTest( + text: CharSequence, + pattern: String, + offset: Int = 0, + ignoreCase: Boolean = false, + groupNumber: Int = 0 + ) { + val editor = mockEditorFromText(text) + val nfa = buildNFA(pattern) + + val result = VimRegexEngine.simulate(nfa, editor, offset, ignoreCase) + when (result) { + is VimMatchResult.Success -> assertEquals(getMatchRanges(text).firstOrNull(), result.groups.get(groupNumber)?.range) + else -> fail("Expected to find match") + } + } + + private fun doTest( + text: CharSequence, + pattern: String, + carets: List, + offset: Int = 0, + ignoreCase: Boolean = false, + groupNumber: Int = 0 + ) { + val editor = mockEditor(text, carets) + val nfa = buildNFA(pattern) + + val result = VimRegexEngine.simulate(nfa, editor, offset, ignoreCase) + when (result) { + is VimMatchResult.Success -> assertEquals(getMatchRanges(text).firstOrNull(), result.groups.get(groupNumber)?.range) + else -> fail("Expected to find match") + } + } + + private fun buildNFA(pattern: String) : NFA { + val parserResult = VimRegexParser.parse(pattern) + return when (parserResult) { + is VimRegexParserResult.Failure -> fail("Parsing failed") + is VimRegexParserResult.Success -> NFA.fromMatcher(DotMatcher(true)).closure(false).concatenate(PatternVisitor.visit(parserResult.tree)) + } + } + } +} \ No newline at end of file diff --git a/vim-engine/src/test/kotlin/com/maddyhome/idea/vim/regexp/internal/VimRegexParserTest.kt b/vim-engine/src/test/kotlin/com/maddyhome/idea/vim/regexp/internal/VimRegexParserTest.kt new file mode 100644 index 0000000000..49bda1f53d --- /dev/null +++ b/vim-engine/src/test/kotlin/com/maddyhome/idea/vim/regexp/internal/VimRegexParserTest.kt @@ -0,0 +1,340 @@ +/* + * Copyright 2003-2023 The IdeaVim authors + * + * Use of this source code is governed by an MIT-style + * license that can be found in the LICENSE.txt file or at + * https://opensource.org/licenses/MIT. + */ + +package com.maddyhome.idea.vim.regexp.internal + +import com.maddyhome.idea.vim.regexp.parser.VimRegexParser +import com.maddyhome.idea.vim.regexp.parser.VimRegexParserResult +import org.junit.jupiter.api.Test +import kotlin.test.fail + + +class VimRegexParserTest { + @Test + fun `test range both bounds`() { + assertSuccess("a\\{2,5}") + } + + @Test + fun `test range left bound`() { + assertSuccess("a\\{6,}") + } + + @Test + fun `test range right bound`() { + assertSuccess("a\\{,10}") + } + + @Test + fun `test range absolute bound`() { + assertSuccess("a\\{5}") + } + + @Test + fun `test range lazy`() { + assertSuccess("a\\{-,5}") + } + + @Test + fun `test range missing right bracket`() { + assertFailure("a\\{5") + } + + @Test + fun `test range two commas`() { + assertFailure("a\\{2,5,}") + } + + @Test + fun `test range non integer bound`() { + assertFailure("a\\{2,g}") + } + + @Test + fun `test range lazy with extra dash`() { + assertFailure("a\\{--2,5}") + } + + @Test + fun `test collection a to z`() { + assertSuccess("[a-z]") + } + + @Test + fun `test collection 0 to 9`() { + assertSuccess("[0-9]") + } + @Test + fun `test collection single element`() { + assertSuccess("[f]") + } + + @Test + fun `test collection a to z and A`() { + assertSuccess("[a-zA]") + } + + @Test + fun `test collection a to z and A to Z`() { + assertSuccess("[a-zA-Z]") + } + + @Test + fun `test collection a to z, 9 and A to Z`() { + assertSuccess("[a-z9A-Z]") + } + + @Test + fun `test collection a to z, dash and Z`() { + /** + * This pattern looks like it should + * be illegal, but Vim allows it and + * matches the characters 'a' to 'z', a + * literal dash '-' and a 'Z' + */ + assertSuccess("[a-z-Z]") + } + + @Test + fun `test collection with single dash`() { + assertSuccess("[-]") + } + + @Test + fun `test collection dash to 0`() { + assertSuccess("[--0]") + } + + @Test + fun `test collection literal dash and a to z`() { + assertSuccess("[-a-z]") + } + + @Test + fun `test collection a to z and literal dash`() { + assertSuccess("[a-z-]") + } + + @Test + fun `test collection a, literal dash and b`() { + assertSuccess("[a\\-b]") + } + + @Test + fun `test collection escaped backslash`() { + assertSuccess("[\\\\]") + } + + @Test + fun `test collection a to z negated`() { + assertSuccess("[^a-z]") + } + + @Test + fun `test collection with negated unescaped caret`() { + /** + * Matches everything except "^". + * It's more correct to write it as + * "[^\^]", escaping the "^", but "^" + * is still allowed to be unescaped and + * taken literally when not immediately + * after the "[" + */ + assertSuccess("[^^]") + } + + @Test + fun `test collection with escaped caret`() { + assertSuccess("[\\^]") + } + + @Test + fun `test collection unescaped backslash not at end`() { + /** + * Matches a "\" or "a". + * Since "\a" isn't an escape sequence, + * the "\" is taken literally. + * Equivalent to "[\\a]" + */ + assertSuccess("[\\a]") + } + + @Test + fun `test collection unicode code range`() { + assertSuccess("[\\u0-\\uFFFF]") + } + + @Test + fun `test collection russian alphabet`() { + assertSuccess("[А-яЁё]") + } + + @Test + fun `test unclosed collection`() { + assertFailure("[a-z") + } + + @Test + fun `test collection unescaped backslash at end`() { + assertFailure("[abc\\]") + } + + @Test + fun `test collection with character class expression`() { + assertSuccess("[[:alpha:]]") + } + + @Test + fun `test collection with invalid character class expression`() { + /** + * Although "[:invalid:]" doesn't correspond to any character + * class expression, this pattern is still valid and means: + * Any of these characters: '[' ':' 'i' 'n' 'v' 'a' 'l' 'd', + * followed by a ']' + */ + assertSuccess("[[:invalid:]]") + } + + @Test + fun `test collection with character class expression missing closing bracket`() { + assertFailure("[[:alnum:]") + } + + @Test + fun `test collection with character class expression and other elements`() { + assertSuccess("[a-z[:digit:]-Z]") + } + + @Test + fun `test opening bracket followed by collection`() { + assertSuccess("\\[[a-z]") + } + + @Test + fun `test collection with opening bracket`() { + assertSuccess("[[a-z]") + } + + @Test + fun `test unicode character`() { + assertSuccess("\u03b5") + } + + @Test + fun `test unicode character in nomagic mode`() { + assertSuccess("\\M\u03b5") + } + + @Test + fun `test wider unicode character`() { + assertSuccess("\uD83E\uDE24") + } + + @Test + fun `test 'ab'`() { + assertSuccess("ab") + } + + @Test + fun `test 'ab' after cursor`() { + assertSuccess("\\%#ab") + } + + @Test + fun `test sequence of 0 or more 'ab'`() { + assertSuccess("\\(ab\\)*") + } + + @Test + fun `test sequence of 0 or more 'ab' no magic`() { + assertSuccess("\\M\\(ab\\)\\*") + } + + @Test + fun `test sequence of 1 or more 'ab'`() { + assertSuccess("\\(ab\\)\\+") + } + + @Test + fun `test 0 or 1 'ab' with equals`() { + assertSuccess("\\(ab\\)\\=") + } + + @Test + fun `test 0 or 1 'ab' with question mark`() { + assertSuccess("\\(ab\\)\\?") + } + + @Test + fun `test nested groups with multi`() { + assertSuccess("\\(\\(a\\)*b\\)\\+") + } + + @Test + fun `test non-capture group`() { + assertSuccess("\\%(a\\)") + } + + @Test + fun `test very nomagic characters`() { + assertSuccess("\\V%(") + } + + @Test + fun `test date format`() { + assertSuccess("\\(\\d\\{2}\\)\\{2}\\d\\{4}") + } + + @Test + fun `test switching to nomagic`() { + assertSuccess("a*\\Ma*") + } + + @Test + fun `test switching to all magic modes`() { + assertSuccess("\\m.*\\M\\.\\*\\v.*\\V\\.\\*") + } + + @Test + fun `test backreference to group 1`() { + assertSuccess("\\v(cat|dog)\\1") + } + + @Test + fun `test unclosed group`() { + assertFailure("\\(ab") + } + + @Test + fun `test unmatched closing )`() { + assertFailure("ab\\)") + } + + @Test + fun `test unclosed non-capture group`() { + assertFailure("\\%(a") + } + + @Test + fun `test unescaped group close`() { + assertFailure("\\(a)") + } + + private fun assertSuccess(pattern: String) { + val result = VimRegexParser.parse(pattern) + if (result is VimRegexParserResult.Failure) { + fail("Expecting successful parsing for pattern $pattern but got ${result.errorCode}") + } + } + + private fun assertFailure(pattern: String) { + if (VimRegexParser.parse(pattern) is VimRegexParserResult.Success) { + fail("Expecting unsuccessful parsing for pattern $pattern") + } + } +}