Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport Zig lexer from Notepad4 #267

Closed
wants to merge 11 commits into from
Closed

Conversation

techee
Copy link
Contributor

@techee techee commented Aug 22, 2024

This patch adds the Zig lexer from the Notepad4 editor originally created by Zufu Liu (@zufuliu).

Some changes have been made to make it compile and work in Scintilla since Notepad4 contains a modified Scintilla version. Also, some features of the Notepad4 lexer have been removed/changed:

  • special highlighting of formatting strings has been removed as it uses some extra functions added to Notepad4's Scintilla and I didn't want to spend much time on figuring out what would have to be back-ported - other lexilla lexers don't do this either anyway
  • folding has been modified to use simple folding like the rest of Scintilla lexers and not folding of the previous line based on brace presence on the next line like Notepad4
  • "semi-syntactic" coloring of Notepad4 which colors functions following "fn" has been removed as this is not performed in other Scintilla lexers
  • highlighting of tasks such as TODOs in comments has been removed as it isn't present in other Scintilla lexers
  • colorig states and keywords have been renamed slightly - keywords at index 3 have been reserved for caller-supplied type names using SCI_SETKEYWORDS like in the C lexer (which we use in Geany to supply type names obtained using ctags)

Full diff is here:

Full diff
--- notepad4/scintilla/lexers/LexZig.cxx	2024-08-21 11:44:51.924318156 +0200
+++ lexilla/lexers/LexZig.cxx	2024-08-22 20:26:47.832211501 +0200
@@ -1,10 +1,15 @@
-// This file is part of Notepad4.
-// See License.txt for details about distribution and modification.
-//! Lexer for Zig
+// Scintilla source code edit control
+/** @file LexZig.cxx
+ ** Lexer for Zig language.
+ **/
+// Based on Zufu Liu's Notepad4 Zig lexer
+// Modified for Scintilla by Jiri Techet, 2024
+// The License.txt file describes the conditions under which this software may be distributed.
 
 #include <cassert>
 #include <cstring>
 
+#include <algorithm>
 #include <string>
 #include <string_view>
 
@@ -17,12 +22,46 @@
 #include "Accessor.h"
 #include "StyleContext.h"
 #include "CharacterSet.h"
-#include "StringUtils.h"
 #include "LexerModule.h"
 
 using namespace Lexilla;
 
 namespace {
+// Use an unnamed namespace to protect the functions and classes from name conflicts
+
+constexpr bool IsAGraphic(int ch) noexcept {
+	// excludes C0 control characters and whitespace
+	return ch > 32 && ch < 127;
+}
+
+constexpr bool IsIdentifierStart(int ch) noexcept {
+	return IsUpperOrLowerCase(ch) || ch == '_';
+}
+
+constexpr bool IsIdentifierStartEx(int ch) noexcept {
+	return IsIdentifierStart(ch) || ch >= 0x80;
+}
+
+constexpr bool IsNumberStart(int ch, int chNext) noexcept {
+	return IsADigit(ch) || (ch == '.' && IsADigit(chNext));
+}
+
+constexpr bool IsIdentifierChar(int ch) noexcept {
+	return IsAlphaNumeric(ch) || ch == '_';
+}
+
+constexpr bool IsNumberContinue(int chPrev, int ch, int chNext) noexcept {
+	return ((ch == '+' || ch == '-') && (chPrev == 'e' || chPrev == 'E'))
+		|| (ch == '.' && chNext != '.');
+}
+
+constexpr bool IsDecimalNumber(int chPrev, int ch, int chNext) noexcept {
+	return IsIdentifierChar(ch) || IsNumberContinue(chPrev, ch, chNext);
+}
+
+constexpr bool IsIdentifierCharEx(int ch) noexcept {
+	return IsIdentifierChar(ch) || ch >= 0x80;
+}
 
 // https://ziglang.org/documentation/master/#Escape-Sequences
 struct EscapeSequence {
@@ -48,113 +87,33 @@
 	}
 	bool atEscapeEnd(int ch) noexcept {
 		--digitsLeft;
-		return digitsLeft <= 0 || !IsHexDigit(ch);
+		return digitsLeft <= 0 || !IsAHeXDigit(ch);
 	}
 };
 
-// https://ziglang.org/documentation/master/std/#std.fmt.format
-enum class FormatArgument {
-	None,
-	Digit,
-	Identifier,
-	Error,
-};
-
-constexpr bool IsBraceFormatSpecifier(int ch) noexcept {
-	return AnyOf(ch, 'b',
-					'c',
-					'd',
-					'e',
-					'f',
-					'g',
-					'o',
-					's',
-					'u',
-					'x', 'X',
-					'?', '!', '*', 'a');
-}
-
-constexpr bool IsBraceFormatNext(int ch) noexcept {
-	return ch == '}' || IsADigit(ch) || ch == '[' || ch == ':' || ch == '.'
-		|| IsBraceFormatSpecifier(ch);
-}
-
-constexpr bool IsFormatArgument(int ch, FormatArgument fmtArgument) noexcept {
-	return IsADigit(ch) || (fmtArgument == FormatArgument::Identifier && IsIdentifierCharEx(ch));
-}
-
-inline Sci_Position CheckBraceFormatSpecifier(const StyleContext &sc, LexAccessor &styler) noexcept {
-	Sci_PositionU pos = sc.currentPos;
-	char ch = static_cast<char>(sc.ch);
-	// [specifier]
-	if (IsBraceFormatSpecifier(sc.ch)) {
-		++pos;
-		if (sc.Match('a', 'n', 'y')) {
-			pos += 2;
-		}
-		ch = styler[pos];
-		if (!AnyOf(ch, ':', '.', '}', '<', '>', '^')) {
-			return 0;
-		}
-	}
-	if (ch == ':') {
-		ch = styler[++pos];
-	}
-	// [[fill] alignment]
-	if (!AnyOf(ch, '\r', '\n', '{', '}')) {
-		Sci_Position width = 1;
-		if (ch & 0x80) {
-			styler.GetCharacterAndWidth(pos, &width);
-		}
-		const char chNext = styler[pos + width];
-		if (AnyOf(ch, '<', '>', '^') || AnyOf(chNext, '<', '>', '^')) {
-			pos += 1 + width;
-			ch = styler[pos];
-		}
-	}
-	// [width]
-	while (IsADigit(ch)) {
-		ch = styler[++pos];
-	}
-	// [.precision]
-	if (ch == '.') {
-		ch = styler[++pos];
-		while (IsADigit(ch)) {
-			ch = styler[++pos];
-		}
-	}
-	if (ch == '}') {
-		return pos - sc.currentPos;
-	}
-	return 0;
-}
-
 enum {
 	ZigLineStateMaskLineComment = 1, // line comment
 	ZigLineStateMaskMultilineString = 1 << 1, // multiline string
 };
 
-//KeywordIndex++Autogenerated -- start of section automatically generated
 enum {
-	KeywordIndex_Keyword = 0,
-	KeywordIndex_Type = 1,
+	KeywordIndex_Primary = 0,
+	KeywordIndex_Secondary = 1,
+	KeywordIndex_Tertiary = 2,
+	KeywordIndex_Type = 3,
 };
-//KeywordIndex--Autogenerated -- end of section automatically generated
 
-enum class KeywordType {
-	None = SCE_ZIG_DEFAULT,
-	Function = SCE_ZIG_FUNCTION_DEFINITION,
+const char *const zigWordListDesc[] = {
+	"Primary keywords",
+	"Secondary keywords",
+	"Tertiary keywords",
+	"Global type definitions",
+	nullptr
 };
 
-constexpr bool IsSpaceEquiv(int state) noexcept {
-	return state <= SCE_ZIG_TASKMARKER;
-}
-
-void ColouriseZigDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, LexerWordList keywordLists, Accessor &styler) {
-	KeywordType kwType = KeywordType::None;
+void ColouriseZigDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, WordList *keywordLists[], Accessor &styler) {
 	int visibleChars = 0;
 	int lineState = 0;
-	FormatArgument fmtArgument = FormatArgument::None;
 	EscapeSequence escSeq;
 
 	StyleContext sc(startPos, lengthDoc, initStyle, styler);
@@ -177,23 +136,16 @@
 				if (sc.state == SCE_ZIG_IDENTIFIER) {
 					char s[128];
 					sc.GetCurrent(s, sizeof(s));
-					if (keywordLists[KeywordIndex_Keyword].InList(s)) {
-						sc.ChangeState(SCE_ZIG_WORD);
-						kwType = KeywordType::None;
-						if (StrEqual(s, "fn")) {
-							kwType = KeywordType::Function;
-						}
-					} else if (keywordLists[KeywordIndex_Type].InList(s)) {
-						sc.ChangeState(SCE_ZIG_TYPE);
-					} else if (kwType != KeywordType::None) {
-						sc.ChangeState(static_cast<int>(kwType));
-					} else if (sc.GetLineNextChar() == '(') {
-						sc.ChangeState(SCE_ZIG_FUNCTION);
+					if (keywordLists[KeywordIndex_Primary]->InList(s)) {
+						sc.ChangeState(SCE_ZIG_KW_PRIMARY);
+					} else if (keywordLists[KeywordIndex_Secondary]->InList(s)) {
+						sc.ChangeState(SCE_ZIG_KW_SECONDARY);
+					} else if (keywordLists[KeywordIndex_Tertiary]->InList(s)) {
+						sc.ChangeState(SCE_ZIG_KW_TERTIARY);
+					} else if (keywordLists[KeywordIndex_Type]->InList(s)) {
+						sc.ChangeState(SCE_ZIG_KW_TYPE);
 					}
 				}
-				if (sc.state != SCE_ZIG_WORD) {
-					kwType = KeywordType::None;
-				}
 				sc.SetState(SCE_ZIG_DEFAULT);
 			}
 			break;
@@ -219,18 +171,6 @@
 						escSeq.resetEscapeState(sc.state);
 						sc.SetState(SCE_ZIG_ESCAPECHAR);
 						sc.Forward();
-                    } else if (sc.ch == '{' && IsBraceFormatNext(sc.chNext)) {
-                    	escSeq.outerState = sc.state;
-                    	sc.SetState(SCE_ZIG_PLACEHOLDER);
-                    	fmtArgument = FormatArgument::None;
-                    	if (IsADigit(sc.chNext)) {
-                    		fmtArgument = FormatArgument::Digit;
-                    	} else if (sc.chNext == '[') {
-                    		fmtArgument = FormatArgument::Identifier;
-							if (IsIdentifierStartEx(sc.GetRelative(2))) {
-								sc.Forward();
-							}
-                    	}
                     }
                 }
 			}
@@ -246,34 +186,6 @@
 			}
 			break;
 
-		case SCE_ZIG_PLACEHOLDER:
-			if (!IsFormatArgument(sc.ch, fmtArgument)) {
-				if (fmtArgument == FormatArgument::Identifier) {
-					if (sc.ch == ']') {
-						sc.Forward();
-					} else {
-						fmtArgument = FormatArgument::Error;
-					}
-				}
-				if (fmtArgument != FormatArgument::Error) {
-					const Sci_Position length = CheckBraceFormatSpecifier(sc, styler);
-					if (length != 0) {
-						sc.SetState(SCE_ZIG_FORMAT_SPECIFIER);
-						sc.Advance(length);
-						sc.SetState(SCE_ZIG_PLACEHOLDER);
-						sc.ForwardSetState(escSeq.outerState);
-						continue;
-					}
-				}
-				if (fmtArgument == FormatArgument::Error || sc.ch != '}') {
-					sc.Rewind();
-					sc.ChangeState(escSeq.outerState);
-				}
-				sc.ForwardSetState(escSeq.outerState);
-				continue;
-			}
-			break;
-
 		case SCE_ZIG_COMMENTLINE:
 		case SCE_ZIG_COMMENTLINEDOC:
 			if (sc.atLineStart) {
@@ -314,7 +226,6 @@
 		if (sc.atLineEnd) {
 			styler.SetLineState(sc.currentLine, lineState);
 			lineState = 0;
-			kwType = KeywordType::None;
 			visibleChars = 0;
 		}
 		sc.Forward();
@@ -332,25 +243,20 @@
 	}
 };
 
-void FoldZigDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, LexerWordList /*keywordLists*/, Accessor &styler) {
+void FoldZigDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, WordList *[] /*keywordLists*/, Accessor &styler) {
 	const Sci_PositionU endPos = startPos + lengthDoc;
-	Sci_Line lineCurrent = styler.GetLine(startPos);
+	Sci_Position lineCurrent = styler.GetLine(startPos);
 	FoldLineState foldPrev(0);
 	int levelCurrent = SC_FOLDLEVELBASE;
 	if (lineCurrent > 0) {
 		levelCurrent = styler.LevelAt(lineCurrent - 1) >> 16;
 		foldPrev = FoldLineState(styler.GetLineState(lineCurrent - 1));
-		const Sci_PositionU bracePos = CheckBraceOnNextLine(styler, lineCurrent - 1, SCE_ZIG_OPERATOR, SCE_ZIG_TASKMARKER);
-		if (bracePos) {
-			startPos = bracePos + 1; // skip the brace
-		}
 	}
 
 	int levelNext = levelCurrent;
 	FoldLineState foldCurrent(styler.GetLineState(lineCurrent));
 	Sci_PositionU lineStartNext = styler.LineStart(lineCurrent + 1);
-	lineStartNext = sci::min(lineStartNext, endPos);
-	int visibleChars = 0;
+	lineStartNext = std::min(lineStartNext, endPos);
 
 	while (startPos < endPos) {
 		initStyle = styler.StyleAt(startPos);
@@ -364,23 +270,14 @@
 			}
 		}
 
-		if (visibleChars == 0 && !IsSpaceEquiv(initStyle)) {
-			++visibleChars;
-		}
 		++startPos;
 		if (startPos == lineStartNext) {
 			const FoldLineState foldNext(styler.GetLineState(lineCurrent + 1));
-			levelNext = sci::max(levelNext, SC_FOLDLEVELBASE);
+			levelNext = std::max(levelNext, SC_FOLDLEVELBASE);
 			if (foldCurrent.lineComment) {
 				levelNext += foldNext.lineComment - foldPrev.lineComment;
 			} else if (foldCurrent.multilineString) {
 				levelNext += foldNext.multilineString - foldPrev.multilineString;
-			} else if (visibleChars) {
-				const Sci_PositionU bracePos = CheckBraceOnNextLine(styler, lineCurrent, SCE_ZIG_OPERATOR, SCE_ZIG_TASKMARKER);
-				if (bracePos) {
-					levelNext++;
-					startPos = bracePos + 1; // skip the brace
-				}
 			}
 
 			const int levelUse = levelCurrent;
@@ -392,15 +289,14 @@
 
 			lineCurrent++;
 			lineStartNext = styler.LineStart(lineCurrent + 1);
-			lineStartNext = sci::min(lineStartNext, endPos);
+			lineStartNext = std::min(lineStartNext, endPos);
 			levelCurrent = levelNext;
 			foldPrev = foldCurrent;
 			foldCurrent = foldNext;
-			visibleChars = 0;
 		}
 	}
 }
 
-}
+}  // unnamed namespace end
 
-extern const LexerModule lmZig(SCLEX_ZIG, ColouriseZigDoc, "zig", FoldZigDoc);
+extern const LexerModule lmZig(SCLEX_ZIG, ColouriseZigDoc, "zig", FoldZigDoc, zigWordListDesc);

Note: Fear not, this is the last lexer I'm planning to steal from Notepad4 :-).

Fixes #237.

@techee
Copy link
Contributor Author

techee commented Aug 22, 2024

I was hesitating a bit whether Zig requires a special lexer and whether it wouldn't be enough to use e.g. the C lexer for it but there are the @-introduced builtin functions and kind of special multiline strings starting with \\ so at the end I think a new lexer is justified.

@nyamatongwe nyamatongwe added the zig Caused by the Zig lexer label Aug 22, 2024
@techee
Copy link
Contributor Author

techee commented Aug 23, 2024

"semi-syntactic" coloring of Notepad4 which colors functions following "fn" has been removed as this is not performed in other Scintilla lexers

Should be no problem to add back if this is the preferred behavior for Scintilla.

@techee techee mentioned this pull request Aug 23, 2024
9 tasks
@nyamatongwe
Copy link
Member

Zig documentation emphasizes that Doc Comments and Top Level Doc Comments are different so I expect users would want these differentiated.

// Comments in Zig start with "//" and end at the next LF byte (end of line).

/// A structure for storing a timestamp, with nanosecond precision (this is a
/// multiline doc comment).

//! This module provides functions for retrieving the current date and
//! time with varying degrees of precision and accuracy.

@nyamatongwe
Copy link
Member

This character literal escape appears to lex incorrectly with the '9}' not in escape style. There doesn't appear to be a limit on number of digits for a {...} type of escape.

'\u{1f4a9}'

\u{NNNNNN} | hexadecimal Unicode scalar value UTF-8 encoded (1 or more digits)

@zufuliu
Copy link
Contributor

zufuliu commented Aug 24, 2024

This character literal escape appears to lex incorrectly

set escSeq.digitsLeft = 9; or a larger value after escSeq.brace = true; can fix this. any leading zero is allowed, but 8 hex (document use 6 hex) should be good enough for UTF-32 hex value. A complex fix may like following:

bool atEscapeEnd(int ch) noexcept {
	--digitsLeft;
	return (!brace && digitsLeft <= 0) || !IsHexDigit(ch);
}

@techee
Copy link
Contributor Author

techee commented Aug 26, 2024

"semi-syntactic" coloring of Notepad4 which colors functions following "fn" has been removed as this is not performed in other Scintilla lexers

Should be no problem to add back if this is the preferred behavior for Scintilla.

I added it back - unlike Dart, detection of function definitions is trivial (they always follow the fn keyword) and also the Zig documentation shows function names highlighted so it's probably the right thing to do.

Zig documentation emphasizes that Doc Comments and Top Level Doc Comments are different so I expect users would want these differentiated.

Done (based on @zufuliu's code).

This character literal escape appears to lex incorrectly with the '9}' not in escape style. There doesn't appear to be a limit on number of digits for a {...} type of escape.

Done. I used the lazy version escSeq.digitsLeft = 9 so up to 8 hex digits are possible.

@techee
Copy link
Contributor Author

techee commented Aug 26, 2024

Assuming that the Dart lexer gets merged first, I'll wait until it's merged and rebase this branch on top of it to resolve the conflicts.

@techee
Copy link
Contributor Author

techee commented Sep 3, 2024

Assuming that the Dart lexer gets merged first, I'll wait until it's merged and rebase this branch on top of it to resolve the conflicts.

Just to be sure we don't wait for each other here - I more or less assumed that

#265 (comment)

applies here too so I didn't perform the rebase. If something is needed from me for this PR, please let me know.

@techee
Copy link
Contributor Author

techee commented Sep 10, 2024

@nyamatongwe Should I convert this one to the object lexer too?

@nyamatongwe
Copy link
Member

It would be best if the Zig lexer is also an object lexer.

@techee
Copy link
Contributor Author

techee commented Sep 11, 2024

It would be best if the Zig lexer is also an object lexer.

OK, I'll have a look at it.

@techee
Copy link
Contributor Author

techee commented Sep 11, 2024

OK, I'll have a look at it.

Done, please let me know if some changes are needed.

I'm not sure whether

12, "SCE_ZIG_BUILTIN_FUNCTION", "identifier", "Builtin function",

is the correct tag or some different tag should be used (builtin functions start with @ which, strictly speaking, isn't an identifier character).

@nyamatongwe
Copy link
Member

Builtin functions are mostly similar to other languages standard library functions so should allow applications to provide similar behaviour by specifying identifier as primary tag.

The use of syntax to distinguish builtins is an example where concepts don't cleanly match between languages. There could be additional tags to refine identifier as there could be for similar situations such as Python which has distinct function and class identifier styles.

@nyamatongwe
Copy link
Member

Most of this is working well but the folding isn't stable: it changes based on the ranges that are lexed. Open AllStyles.zig in SciTE, press Ctrl+End to go to the end and the folding will be balanced. Go back to the top, close and reopen the file. The scroll down line by line and some of the folding structure will be wrong - for me, it commonly is unbalanced starting from line 112 where the SCE_ZIG_MULTISTRING multiline string should have a start fold. If this element is lexed together (by pressing Page Down) another unbalanced fold occurs at line 204.

I may commit the lexer in this state, perhaps with folding turned off, to provide a basis for further work.

This set of style definitions helps see the code for testing but its not good enough to distribute.

style.zig.0=fore:#000000
style.zig.1=fore:#008000,italics
style.zig.2=fore:#008000,back:#F0FFF0
style.zig.3=fore:#00B0B0
style.zig.4=fore:#004080,bold
style.zig.5=fore:#A0A000
style.zig.6=fore:#800080
style.zig.7=fore:#800080,back:#FFF0FF
style.zig.8=fore:#804000,back:#FFF0FF
style.zig.9=fore:#000000
style.zig.10=fore:#A000A0
style.zig.11=fore:#000089,bold
style.zig.12=fore:#B00060,bold
style.zig.13=fore:#0070B0,italics
style.zig.14=fore:#60A000
style.zig.15=fore:#0060A0
style.zig.16=fore:#A00060

@zufuliu
Copy link
Contributor

zufuliu commented Sep 12, 2024

Most of this is working well but the folding isn't stable: it changes based on the ranges that are lexed.

Likely same bug as Bash comment line folding (issue #224), Dart lexer has same bug:
image

Backtrack one line like Bash should fix the bug (backtracking is done inside LexerModule::Fold() for function lexers).

lexilla/lexers/LexBash.cxx

Lines 1195 to 1201 in 75ae907

Sci_Position lineCurrent = styler.GetLine(startPos);
// Backtrack to previous line in case need to fix its fold status
if (lineCurrent > 0) {
lineCurrent--;
startPos = styler.LineStart(lineCurrent);
initStyle = (startPos > 0) ? styler.StyleIndexAt(startPos - 1) : 0;
}

@techee
Copy link
Contributor Author

techee commented Sep 12, 2024

@zufuliu Thanks, it works!

(To be clear, I don't know anything about how folding works and did just Ctrl+C Ctrl+V here.)

Dart lexer has same bug:

I'll try to reproduce it and if the bug is present, I'll submit the same patch for the Dart lexer too.

@techee
Copy link
Contributor Author

techee commented Sep 12, 2024

Alright, I have one quite interesting observation - I first accidentally checked out the Dart lexer branch which wasn't converted to the object lexer and I couldn't reproduce the problem there. But once I realized it and switched to the branch using the object lexer, the problem appeared.

Is there some difference between start line numbers between object/non-object lexers that could cause this? Isn't this a bug somewhere in Scintilla/Lexilla?

@techee
Copy link
Contributor Author

techee commented Sep 12, 2024

I just added a printf() printing the line number just behind the initial

	Sci_Position lineCurrent = styler.GetLine(startPos);

in Lex() both in the non-object and object lexer. Then I started SciTE moved caret down the screen and the first movement down that caused editor scroll was:

  • line 46 for non-object lexer
  • line 47 for object lexer

So something somewhere must cause this difference by 1 which I guess is not intended.

@nyamatongwe
Copy link
Member

For historical reasons, the adapter code LexerModule::Fold that translates between the ILexer methods and function lexers moves back one line from the start position. This is less efficient as its folding an extra line each time.
It does cover up some folding problems but not all. It is generally better to move back to the start of the multi-line construct.

@techee
Copy link
Contributor Author

techee commented Sep 13, 2024

For historical reasons, the adapter code LexerModule::Fold that translates between the ILexer methods and function lexers moves back one line from the start position. This is less efficient as its folding an extra line each time.
It does cover up some folding problems but not all. It is generally better to move back to the start of the multi-line construct.

Alright, so what should I do in the Zig and Dart case? Backtrack 1 line or to the beginning of the multiline string/comment?

nyamatongwe added a commit that referenced this pull request Sep 13, 2024
… document and

lexer object from successful whole document fold for the per-line fold.
Fixed by creating a new document and lexer for the per-line lexing and folding.
This showed there are problems with the current Dart, F#, and Julia folders as well as
the new Zig lexer in #267.
@nyamatongwe
Copy link
Member

what should I do in the Zig and Dart case? Backtrack 1 line or to the beginning of the multiline string/comment?

Start of string/comment.

This should have been caught by the lexer testing program TestLexers which runs the lexer and folder over the file twice: once with the whole file in one go and once line-by-line. However, the same document object (with styling, fold levels, and line state) was reused for both runs and the existing data meant the line-by-line run produced the same results. This is now fixed with 3ace72f which also shows problems with Dart (and Julia and F#). To ensure this doesn't cause failures in CI, these are turned off for now for Dart, F# and Julia by adding testlexers.per.line.disable=1 to their properties.

@techee
Copy link
Contributor Author

techee commented Sep 13, 2024

Start of string/comment.

For Dart I did this: #275

If it's considered OK, I'll do the same for Zig here (plus I'll rebase this PR on top of master to get the changes in the test suite).

@nyamatongwe
Copy link
Member

The Dart change is OK although it could be shortened with AnyOf.

This patch adds the Zig lexer from the Notepad4 editor originally
created by Zufu Liu (@zufuliu).

Some changes have been made to make it compile and work in Scintilla
since Notepad4 contains a modified Scintilla version.
Also, some features of the Notepad4 lexer have been removed/changed:

- special highlighting of formatting strings has been removed as
  it uses some extra functions added to Notepad4's Scintilla and I didn't
  want to spend much time on figuring out what would have to be
  back-ported - other lexilla lexers don't do this either anyway
- folding has been modified to use simple folding like the rest of
  Scintilla lexers and not folding of the previous line based on brace
  presence on the next line like Notepad4
- "semi-syntactic" coloring of Notepad4 which colors functions following
  "fn" has been removed as this is not performed in other Scintilla lexers
- highlighting of tasks such as TODOs in comments has been removed as it
  isn't present in other Scintilla lexers
- colorig states and keywords have been renamed slightly - keywords at
  index 3 have been reserved for caller-supplied type names using
  SCI_SETKEYWORDS like in the C lexer (which we use in Geany to supply
  type names obtained using ctags)

Fixes ScintillaOrg#237.
@techee
Copy link
Contributor Author

techee commented Sep 14, 2024

If it's considered OK, I'll do the same for Zig here (plus I'll rebase this PR on top of master to get the changes in the test suite).

Done.

The Dart change is OK although it could be shortened with AnyOf.

Done for Zig.

nyamatongwe pushed a commit that referenced this pull request Sep 14, 2024
This patch adds the Zig lexer from the Notepad4 editor originally
created by Zufu Liu (@zufuliu) and convert it to an object lexer.

Some changes have been made to make it compile and work in Scintilla
since Notepad4 contains a modified Scintilla version.
Also, some features of the Notepad4 lexer have been removed/changed:

- special highlighting of formatting strings has been removed as
  it uses some extra functions added to Notepad4's Scintilla and I didn't
  want to spend much time on figuring out what would have to be
  back-ported - other lexilla lexers don't do this either anyway
- folding has been modified to use simple folding like the rest of
  Scintilla lexers and not folding of the previous line based on brace
  presence on the next line like Notepad4
- "semi-syntactic" coloring of Notepad4 which colors functions following
  "fn" has been removed as this is not performed in other Scintilla lexers
- highlighting of tasks such as TODOs in comments has been removed as it
  isn't present in other Scintilla lexers
- colorig states and keywords have been renamed slightly - keywords at
  index 3 have been reserved for caller-supplied type names using
  SCI_SETKEYWORDS like in the C lexer (which we use in Geany to supply
  type names obtained using ctags)

  Add SCE_ZIG_COMMENTLINETOP state for top-level comments.

  Add styling function definitions
  I.e. identifiers after the "fn" keywords.

Fixes #237.
@nyamatongwe nyamatongwe added the committed Issue fixed in repository but not in release label Sep 14, 2024
@nyamatongwe
Copy link
Member

Committed with some minor edits. The ID number clashed with SCLEX_DART so incremented to be unique. Changed comment from "ZIG" to "Zig" as this value is commonly the name used by the language's web site and that would be appropriate for UI. Change header inclusion order as this is defined in scripts/HeaderOrder.txt and checked by scintilla/scripts/HeaderCheck.py.

@nyamatongwe nyamatongwe removed the committed Issue fixed in repository but not in release label Dec 31, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
zig Caused by the Zig lexer
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[lexer] [feature request] add Zig support
3 participants