Backport Zig lexer from Notepad4 #267

techee · 2024-08-22T20:54:39Z

This patch adds the Zig lexer from the Notepad4 editor originally created by Zufu Liu (@zufuliu).

Some changes have been made to make it compile and work in Scintilla since Notepad4 contains a modified Scintilla version. Also, some features of the Notepad4 lexer have been removed/changed:

special highlighting of formatting strings has been removed as it uses some extra functions added to Notepad4's Scintilla and I didn't want to spend much time on figuring out what would have to be back-ported - other lexilla lexers don't do this either anyway
folding has been modified to use simple folding like the rest of Scintilla lexers and not folding of the previous line based on brace presence on the next line like Notepad4
"semi-syntactic" coloring of Notepad4 which colors functions following "fn" has been removed as this is not performed in other Scintilla lexers
highlighting of tasks such as TODOs in comments has been removed as it isn't present in other Scintilla lexers
colorig states and keywords have been renamed slightly - keywords at index 3 have been reserved for caller-supplied type names using SCI_SETKEYWORDS like in the C lexer (which we use in Geany to supply type names obtained using ctags)

Full diff is here:

Full diff

--- notepad4/scintilla/lexers/LexZig.cxx	2024-08-21 11:44:51.924318156 +0200
+++ lexilla/lexers/LexZig.cxx	2024-08-22 20:26:47.832211501 +0200
@@ -1,10 +1,15 @@
-// This file is part of Notepad4.
-// See License.txt for details about distribution and modification.
-//! Lexer for Zig
+// Scintilla source code edit control
+/** @file LexZig.cxx
+ ** Lexer for Zig language.
+ **/
+// Based on Zufu Liu's Notepad4 Zig lexer
+// Modified for Scintilla by Jiri Techet, 2024
+// The License.txt file describes the conditions under which this software may be distributed.
 
 #include <cassert>
 #include <cstring>
 
+#include <algorithm>
 #include <string>
 #include <string_view>
 
@@ -17,12 +22,46 @@
 #include "Accessor.h"
 #include "StyleContext.h"
 #include "CharacterSet.h"
-#include "StringUtils.h"
 #include "LexerModule.h"
 
 using namespace Lexilla;
 
 namespace {
+// Use an unnamed namespace to protect the functions and classes from name conflicts
+
+constexpr bool IsAGraphic(int ch) noexcept {
+	// excludes C0 control characters and whitespace
+	return ch > 32 && ch < 127;
+}
+
+constexpr bool IsIdentifierStart(int ch) noexcept {
+	return IsUpperOrLowerCase(ch) || ch == '_';
+}
+
+constexpr bool IsIdentifierStartEx(int ch) noexcept {
+	return IsIdentifierStart(ch) || ch >= 0x80;
+}
+
+constexpr bool IsNumberStart(int ch, int chNext) noexcept {
+	return IsADigit(ch) || (ch == '.' && IsADigit(chNext));
+}
+
+constexpr bool IsIdentifierChar(int ch) noexcept {
+	return IsAlphaNumeric(ch) || ch == '_';
+}
+
+constexpr bool IsNumberContinue(int chPrev, int ch, int chNext) noexcept {
+	return ((ch == '+' || ch == '-') && (chPrev == 'e' || chPrev == 'E'))
+		|| (ch == '.' && chNext != '.');
+}
+
+constexpr bool IsDecimalNumber(int chPrev, int ch, int chNext) noexcept {
+	return IsIdentifierChar(ch) || IsNumberContinue(chPrev, ch, chNext);
+}
+
+constexpr bool IsIdentifierCharEx(int ch) noexcept {
+	return IsIdentifierChar(ch) || ch >= 0x80;
+}
 
 // https://ziglang.org/documentation/master/#Escape-Sequences
 struct EscapeSequence {
@@ -48,113 +87,33 @@
 	}
 	bool atEscapeEnd(int ch) noexcept {
 		--digitsLeft;
-		return digitsLeft <= 0 || !IsHexDigit(ch);
+		return digitsLeft <= 0 || !IsAHeXDigit(ch);
 	}
 };
 
-// https://ziglang.org/documentation/master/std/#std.fmt.format
-enum class FormatArgument {
-	None,
-	Digit,
-	Identifier,
-	Error,
-};
-
-constexpr bool IsBraceFormatSpecifier(int ch) noexcept {
-	return AnyOf(ch, 'b',
-					'c',
-					'd',
-					'e',
-					'f',
-					'g',
-					'o',
-					's',
-					'u',
-					'x', 'X',
-					'?', '!', '*', 'a');
-}
-
-constexpr bool IsBraceFormatNext(int ch) noexcept {
-	return ch == '}' || IsADigit(ch) || ch == '[' || ch == ':' || ch == '.'
-		|| IsBraceFormatSpecifier(ch);
-}
-
-constexpr bool IsFormatArgument(int ch, FormatArgument fmtArgument) noexcept {
-	return IsADigit(ch) || (fmtArgument == FormatArgument::Identifier && IsIdentifierCharEx(ch));
-}
-
-inline Sci_Position CheckBraceFormatSpecifier(const StyleContext &sc, LexAccessor &styler) noexcept {
-	Sci_PositionU pos = sc.currentPos;
-	char ch = static_cast<char>(sc.ch);
-	// [specifier]
-	if (IsBraceFormatSpecifier(sc.ch)) {
-		++pos;
-		if (sc.Match('a', 'n', 'y')) {
-			pos += 2;
-		}
-		ch = styler[pos];
-		if (!AnyOf(ch, ':', '.', '}', '<', '>', '^')) {
-			return 0;
-		}
-	}
-	if (ch == ':') {
-		ch = styler[++pos];
-	}
-	// [[fill] alignment]
-	if (!AnyOf(ch, '\r', '\n', '{', '}')) {
-		Sci_Position width = 1;
-		if (ch & 0x80) {
-			styler.GetCharacterAndWidth(pos, &width);
-		}
-		const char chNext = styler[pos + width];
-		if (AnyOf(ch, '<', '>', '^') || AnyOf(chNext, '<', '>', '^')) {
-			pos += 1 + width;
-			ch = styler[pos];
-		}
-	}
-	// [width]
-	while (IsADigit(ch)) {
-		ch = styler[++pos];
-	}
-	// [.precision]
-	if (ch == '.') {
-		ch = styler[++pos];
-		while (IsADigit(ch)) {
-			ch = styler[++pos];
-		}
-	}
-	if (ch == '}') {
-		return pos - sc.currentPos;
-	}
-	return 0;
-}
-
 enum {
 	ZigLineStateMaskLineComment = 1, // line comment
 	ZigLineStateMaskMultilineString = 1 << 1, // multiline string
 };
 
-//KeywordIndex++Autogenerated -- start of section automatically generated
 enum {
-	KeywordIndex_Keyword = 0,
-	KeywordIndex_Type = 1,
+	KeywordIndex_Primary = 0,
+	KeywordIndex_Secondary = 1,
+	KeywordIndex_Tertiary = 2,
+	KeywordIndex_Type = 3,
 };
-//KeywordIndex--Autogenerated -- end of section automatically generated
 
-enum class KeywordType {
-	None = SCE_ZIG_DEFAULT,
-	Function = SCE_ZIG_FUNCTION_DEFINITION,
+const char *const zigWordListDesc[] = {
+	"Primary keywords",
+	"Secondary keywords",
+	"Tertiary keywords",
+	"Global type definitions",
+	nullptr
 };
 
-constexpr bool IsSpaceEquiv(int state) noexcept {
-	return state <= SCE_ZIG_TASKMARKER;
-}
-
-void ColouriseZigDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, LexerWordList keywordLists, Accessor &styler) {
-	KeywordType kwType = KeywordType::None;
+void ColouriseZigDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, WordList *keywordLists[], Accessor &styler) {
 	int visibleChars = 0;
 	int lineState = 0;
-	FormatArgument fmtArgument = FormatArgument::None;
 	EscapeSequence escSeq;
 
 	StyleContext sc(startPos, lengthDoc, initStyle, styler);
@@ -177,23 +136,16 @@
 				if (sc.state == SCE_ZIG_IDENTIFIER) {
 					char s[128];
 					sc.GetCurrent(s, sizeof(s));
-					if (keywordLists[KeywordIndex_Keyword].InList(s)) {
-						sc.ChangeState(SCE_ZIG_WORD);
-						kwType = KeywordType::None;
-						if (StrEqual(s, "fn")) {
-							kwType = KeywordType::Function;
-						}
-					} else if (keywordLists[KeywordIndex_Type].InList(s)) {
-						sc.ChangeState(SCE_ZIG_TYPE);
-					} else if (kwType != KeywordType::None) {
-						sc.ChangeState(static_cast<int>(kwType));
-					} else if (sc.GetLineNextChar() == '(') {
-						sc.ChangeState(SCE_ZIG_FUNCTION);
+					if (keywordLists[KeywordIndex_Primary]->InList(s)) {
+						sc.ChangeState(SCE_ZIG_KW_PRIMARY);
+					} else if (keywordLists[KeywordIndex_Secondary]->InList(s)) {
+						sc.ChangeState(SCE_ZIG_KW_SECONDARY);
+					} else if (keywordLists[KeywordIndex_Tertiary]->InList(s)) {
+						sc.ChangeState(SCE_ZIG_KW_TERTIARY);
+					} else if (keywordLists[KeywordIndex_Type]->InList(s)) {
+						sc.ChangeState(SCE_ZIG_KW_TYPE);
 					}
 				}
-				if (sc.state != SCE_ZIG_WORD) {
-					kwType = KeywordType::None;
-				}
 				sc.SetState(SCE_ZIG_DEFAULT);
 			}
 			break;
@@ -219,18 +171,6 @@
 						escSeq.resetEscapeState(sc.state);
 						sc.SetState(SCE_ZIG_ESCAPECHAR);
 						sc.Forward();
-                    } else if (sc.ch == '{' && IsBraceFormatNext(sc.chNext)) {
-                    	escSeq.outerState = sc.state;
-                    	sc.SetState(SCE_ZIG_PLACEHOLDER);
-                    	fmtArgument = FormatArgument::None;
-                    	if (IsADigit(sc.chNext)) {
-                    		fmtArgument = FormatArgument::Digit;
-                    	} else if (sc.chNext == '[') {
-                    		fmtArgument = FormatArgument::Identifier;
-							if (IsIdentifierStartEx(sc.GetRelative(2))) {
-								sc.Forward();
-							}
-                    	}
                     }
                 }
 			}
@@ -246,34 +186,6 @@
 			}
 			break;
 
-		case SCE_ZIG_PLACEHOLDER:
-			if (!IsFormatArgument(sc.ch, fmtArgument)) {
-				if (fmtArgument == FormatArgument::Identifier) {
-					if (sc.ch == ']') {
-						sc.Forward();
-					} else {
-						fmtArgument = FormatArgument::Error;
-					}
-				}
-				if (fmtArgument != FormatArgument::Error) {
-					const Sci_Position length = CheckBraceFormatSpecifier(sc, styler);
-					if (length != 0) {
-						sc.SetState(SCE_ZIG_FORMAT_SPECIFIER);
-						sc.Advance(length);
-						sc.SetState(SCE_ZIG_PLACEHOLDER);
-						sc.ForwardSetState(escSeq.outerState);
-						continue;
-					}
-				}
-				if (fmtArgument == FormatArgument::Error || sc.ch != '}') {
-					sc.Rewind();
-					sc.ChangeState(escSeq.outerState);
-				}
-				sc.ForwardSetState(escSeq.outerState);
-				continue;
-			}
-			break;
-
 		case SCE_ZIG_COMMENTLINE:
 		case SCE_ZIG_COMMENTLINEDOC:
 			if (sc.atLineStart) {
@@ -314,7 +226,6 @@
 		if (sc.atLineEnd) {
 			styler.SetLineState(sc.currentLine, lineState);
 			lineState = 0;
-			kwType = KeywordType::None;
 			visibleChars = 0;
 		}
 		sc.Forward();
@@ -332,25 +243,20 @@
 	}
 };
 
-void FoldZigDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, LexerWordList /*keywordLists*/, Accessor &styler) {
+void FoldZigDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, WordList *[] /*keywordLists*/, Accessor &styler) {
 	const Sci_PositionU endPos = startPos + lengthDoc;
-	Sci_Line lineCurrent = styler.GetLine(startPos);
+	Sci_Position lineCurrent = styler.GetLine(startPos);
 	FoldLineState foldPrev(0);
 	int levelCurrent = SC_FOLDLEVELBASE;
 	if (lineCurrent > 0) {
 		levelCurrent = styler.LevelAt(lineCurrent - 1) >> 16;
 		foldPrev = FoldLineState(styler.GetLineState(lineCurrent - 1));
-		const Sci_PositionU bracePos = CheckBraceOnNextLine(styler, lineCurrent - 1, SCE_ZIG_OPERATOR, SCE_ZIG_TASKMARKER);
-		if (bracePos) {
-			startPos = bracePos + 1; // skip the brace
-		}
 	}
 
 	int levelNext = levelCurrent;
 	FoldLineState foldCurrent(styler.GetLineState(lineCurrent));
 	Sci_PositionU lineStartNext = styler.LineStart(lineCurrent + 1);
-	lineStartNext = sci::min(lineStartNext, endPos);
-	int visibleChars = 0;
+	lineStartNext = std::min(lineStartNext, endPos);
 
 	while (startPos < endPos) {
 		initStyle = styler.StyleAt(startPos);
@@ -364,23 +270,14 @@
 			}
 		}
 
-		if (visibleChars == 0 && !IsSpaceEquiv(initStyle)) {
-			++visibleChars;
-		}
 		++startPos;
 		if (startPos == lineStartNext) {
 			const FoldLineState foldNext(styler.GetLineState(lineCurrent + 1));
-			levelNext = sci::max(levelNext, SC_FOLDLEVELBASE);
+			levelNext = std::max(levelNext, SC_FOLDLEVELBASE);
 			if (foldCurrent.lineComment) {
 				levelNext += foldNext.lineComment - foldPrev.lineComment;
 			} else if (foldCurrent.multilineString) {
 				levelNext += foldNext.multilineString - foldPrev.multilineString;
-			} else if (visibleChars) {
-				const Sci_PositionU bracePos = CheckBraceOnNextLine(styler, lineCurrent, SCE_ZIG_OPERATOR, SCE_ZIG_TASKMARKER);
-				if (bracePos) {
-					levelNext++;
-					startPos = bracePos + 1; // skip the brace
-				}
 			}
 
 			const int levelUse = levelCurrent;
@@ -392,15 +289,14 @@
 
 			lineCurrent++;
 			lineStartNext = styler.LineStart(lineCurrent + 1);
-			lineStartNext = sci::min(lineStartNext, endPos);
+			lineStartNext = std::min(lineStartNext, endPos);
 			levelCurrent = levelNext;
 			foldPrev = foldCurrent;
 			foldCurrent = foldNext;
-			visibleChars = 0;
 		}
 	}
 }
 
-}
+}  // unnamed namespace end
 
-extern const LexerModule lmZig(SCLEX_ZIG, ColouriseZigDoc, "zig", FoldZigDoc);
+extern const LexerModule lmZig(SCLEX_ZIG, ColouriseZigDoc, "zig", FoldZigDoc, zigWordListDesc);

Note: Fear not, this is the last lexer I'm planning to steal from Notepad4 :-).

Fixes #237.

techee · 2024-08-22T20:58:07Z

I was hesitating a bit whether Zig requires a special lexer and whether it wouldn't be enough to use e.g. the C lexer for it but there are the @-introduced builtin functions and kind of special multiline strings starting with \\ so at the end I think a new lexer is justified.

techee · 2024-08-23T09:35:11Z

"semi-syntactic" coloring of Notepad4 which colors functions following "fn" has been removed as this is not performed in other Scintilla lexers

Should be no problem to add back if this is the preferred behavior for Scintilla.

nyamatongwe · 2024-08-24T03:38:46Z

Zig documentation emphasizes that Doc Comments and Top Level Doc Comments are different so I expect users would want these differentiated.

// Comments in Zig start with "//" and end at the next LF byte (end of line).

/// A structure for storing a timestamp, with nanosecond precision (this is a
/// multiline doc comment).

//! This module provides functions for retrieving the current date and
//! time with varying degrees of precision and accuracy.

nyamatongwe · 2024-08-24T04:07:49Z

This character literal escape appears to lex incorrectly with the '9}' not in escape style. There doesn't appear to be a limit on number of digits for a {...} type of escape.

'\u{1f4a9}'

\u{NNNNNN} | hexadecimal Unicode scalar value UTF-8 encoded (1 or more digits)

zufuliu · 2024-08-24T14:05:40Z

This character literal escape appears to lex incorrectly

set escSeq.digitsLeft = 9; or a larger value after escSeq.brace = true; can fix this. any leading zero is allowed, but 8 hex (document use 6 hex) should be good enough for UTF-32 hex value. A complex fix may like following:

bool atEscapeEnd(int ch) noexcept {
	--digitsLeft;
	return (!brace && digitsLeft <= 0) || !IsHexDigit(ch);
}

techee · 2024-08-26T20:39:59Z

"semi-syntactic" coloring of Notepad4 which colors functions following "fn" has been removed as this is not performed in other Scintilla lexers

Should be no problem to add back if this is the preferred behavior for Scintilla.

I added it back - unlike Dart, detection of function definitions is trivial (they always follow the fn keyword) and also the Zig documentation shows function names highlighted so it's probably the right thing to do.

Zig documentation emphasizes that Doc Comments and Top Level Doc Comments are different so I expect users would want these differentiated.

Done (based on @zufuliu's code).

This character literal escape appears to lex incorrectly with the '9}' not in escape style. There doesn't appear to be a limit on number of digits for a {...} type of escape.

Done. I used the lazy version escSeq.digitsLeft = 9 so up to 8 hex digits are possible.

techee · 2024-08-26T20:45:36Z

Assuming that the Dart lexer gets merged first, I'll wait until it's merged and rebase this branch on top of it to resolve the conflicts.

techee · 2024-09-03T16:23:54Z

Assuming that the Dart lexer gets merged first, I'll wait until it's merged and rebase this branch on top of it to resolve the conflicts.

Just to be sure we don't wait for each other here - I more or less assumed that

#265 (comment)

applies here too so I didn't perform the rebase. If something is needed from me for this PR, please let me know.

techee · 2024-09-10T22:40:08Z

@nyamatongwe Should I convert this one to the object lexer too?

nyamatongwe · 2024-09-11T02:36:50Z

It would be best if the Zig lexer is also an object lexer.

techee · 2024-09-11T07:55:26Z

It would be best if the Zig lexer is also an object lexer.

OK, I'll have a look at it.

techee · 2024-09-11T19:28:43Z

OK, I'll have a look at it.

Done, please let me know if some changes are needed.

I'm not sure whether

12, "SCE_ZIG_BUILTIN_FUNCTION", "identifier", "Builtin function",

is the correct tag or some different tag should be used (builtin functions start with @ which, strictly speaking, isn't an identifier character).

nyamatongwe · 2024-09-11T23:56:17Z

Builtin functions are mostly similar to other languages standard library functions so should allow applications to provide similar behaviour by specifying identifier as primary tag.

The use of syntax to distinguish builtins is an example where concepts don't cleanly match between languages. There could be additional tags to refine identifier as there could be for similar situations such as Python which has distinct function and class identifier styles.

nyamatongwe · 2024-09-12T10:00:37Z

Most of this is working well but the folding isn't stable: it changes based on the ranges that are lexed. Open AllStyles.zig in SciTE, press Ctrl+End to go to the end and the folding will be balanced. Go back to the top, close and reopen the file. The scroll down line by line and some of the folding structure will be wrong - for me, it commonly is unbalanced starting from line 112 where the SCE_ZIG_MULTISTRING multiline string should have a start fold. If this element is lexed together (by pressing Page Down) another unbalanced fold occurs at line 204.

I may commit the lexer in this state, perhaps with folding turned off, to provide a basis for further work.

This set of style definitions helps see the code for testing but its not good enough to distribute.

style.zig.0=fore:#000000
style.zig.1=fore:#008000,italics
style.zig.2=fore:#008000,back:#F0FFF0
style.zig.3=fore:#00B0B0
style.zig.4=fore:#004080,bold
style.zig.5=fore:#A0A000
style.zig.6=fore:#800080
style.zig.7=fore:#800080,back:#FFF0FF
style.zig.8=fore:#804000,back:#FFF0FF
style.zig.9=fore:#000000
style.zig.10=fore:#A000A0
style.zig.11=fore:#000089,bold
style.zig.12=fore:#B00060,bold
style.zig.13=fore:#0070B0,italics
style.zig.14=fore:#60A000
style.zig.15=fore:#0060A0
style.zig.16=fore:#A00060

zufuliu · 2024-09-12T11:21:36Z

Most of this is working well but the folding isn't stable: it changes based on the ranges that are lexed.

Likely same bug as Bash comment line folding (issue #224), Dart lexer has same bug:

Backtrack one line like Bash should fix the bug (backtracking is done inside LexerModule::Fold() for function lexers).

lexilla/lexers/LexBash.cxx

Lines 1195 to 1201 in 75ae907

    
           Sci_Position lineCurrent = styler.GetLine(startPos); 
        
           // Backtrack to previous line in case need to fix its fold status 
        
           if (lineCurrent > 0) { 
        
           	lineCurrent--; 
        
           	startPos = styler.LineStart(lineCurrent); 
        
           	initStyle = (startPos > 0) ? styler.StyleIndexAt(startPos - 1) : 0; 
        
           }

techee · 2024-09-12T18:53:21Z

@zufuliu Thanks, it works!

(To be clear, I don't know anything about how folding works and did just Ctrl+C Ctrl+V here.)

Dart lexer has same bug:

I'll try to reproduce it and if the bug is present, I'll submit the same patch for the Dart lexer too.

techee · 2024-09-12T19:11:46Z

Alright, I have one quite interesting observation - I first accidentally checked out the Dart lexer branch which wasn't converted to the object lexer and I couldn't reproduce the problem there. But once I realized it and switched to the branch using the object lexer, the problem appeared.

Is there some difference between start line numbers between object/non-object lexers that could cause this? Isn't this a bug somewhere in Scintilla/Lexilla?

techee · 2024-09-12T19:32:28Z

I just added a printf() printing the line number just behind the initial

	Sci_Position lineCurrent = styler.GetLine(startPos);

in Lex() both in the non-object and object lexer. Then I started SciTE moved caret down the screen and the first movement down that caused editor scroll was:

line 46 for non-object lexer
line 47 for object lexer

So something somewhere must cause this difference by 1 which I guess is not intended.

nyamatongwe · 2024-09-12T22:26:28Z

For historical reasons, the adapter code LexerModule::Fold that translates between the ILexer methods and function lexers moves back one line from the start position. This is less efficient as its folding an extra line each time.
It does cover up some folding problems but not all. It is generally better to move back to the start of the multi-line construct.

techee · 2024-09-13T07:48:22Z

For historical reasons, the adapter code LexerModule::Fold that translates between the ILexer methods and function lexers moves back one line from the start position. This is less efficient as its folding an extra line each time.
It does cover up some folding problems but not all. It is generally better to move back to the start of the multi-line construct.

Alright, so what should I do in the Zig and Dart case? Backtrack 1 line or to the beginning of the multiline string/comment?

… document and lexer object from successful whole document fold for the per-line fold. Fixed by creating a new document and lexer for the per-line lexing and folding. This showed there are problems with the current Dart, F#, and Julia folders as well as the new Zig lexer in #267.

nyamatongwe · 2024-09-13T11:31:36Z

what should I do in the Zig and Dart case? Backtrack 1 line or to the beginning of the multiline string/comment?

Start of string/comment.

This should have been caught by the lexer testing program TestLexers which runs the lexer and folder over the file twice: once with the whole file in one go and once line-by-line. However, the same document object (with styling, fold levels, and line state) was reused for both runs and the existing data meant the line-by-line run produced the same results. This is now fixed with 3ace72f which also shows problems with Dart (and Julia and F#). To ensure this doesn't cause failures in CI, these are turned off for now for Dart, F# and Julia by adding testlexers.per.line.disable=1 to their properties.

techee · 2024-09-13T15:56:16Z

Start of string/comment.

For Dart I did this: #275

If it's considered OK, I'll do the same for Zig here (plus I'll rebase this PR on top of master to get the changes in the test suite).

nyamatongwe · 2024-09-13T23:58:24Z

The Dart change is OK although it could be shortened with AnyOf.

@zufuliu

This patch adds the Zig lexer from the Notepad4 editor originally created by Zufu Liu (@zufuliu). Some changes have been made to make it compile and work in Scintilla since Notepad4 contains a modified Scintilla version. Also, some features of the Notepad4 lexer have been removed/changed: - special highlighting of formatting strings has been removed as it uses some extra functions added to Notepad4's Scintilla and I didn't want to spend much time on figuring out what would have to be back-ported - other lexilla lexers don't do this either anyway - folding has been modified to use simple folding like the rest of Scintilla lexers and not folding of the previous line based on brace presence on the next line like Notepad4 - "semi-syntactic" coloring of Notepad4 which colors functions following "fn" has been removed as this is not performed in other Scintilla lexers - highlighting of tasks such as TODOs in comments has been removed as it isn't present in other Scintilla lexers - colorig states and keywords have been renamed slightly - keywords at index 3 have been reserved for caller-supplied type names using SCI_SETKEYWORDS like in the C lexer (which we use in Geany to supply type names obtained using ctags) Fixes ScintillaOrg#237.

I.e. identifiers after the "fn" keywords.

techee · 2024-09-14T12:08:16Z

If it's considered OK, I'll do the same for Zig here (plus I'll rebase this PR on top of master to get the changes in the test suite).

Done.

The Dart change is OK although it could be shortened with AnyOf.

Done for Zig.

@zufuliu

This patch adds the Zig lexer from the Notepad4 editor originally created by Zufu Liu (@zufuliu) and convert it to an object lexer. Some changes have been made to make it compile and work in Scintilla since Notepad4 contains a modified Scintilla version. Also, some features of the Notepad4 lexer have been removed/changed: - special highlighting of formatting strings has been removed as it uses some extra functions added to Notepad4's Scintilla and I didn't want to spend much time on figuring out what would have to be back-ported - other lexilla lexers don't do this either anyway - folding has been modified to use simple folding like the rest of Scintilla lexers and not folding of the previous line based on brace presence on the next line like Notepad4 - "semi-syntactic" coloring of Notepad4 which colors functions following "fn" has been removed as this is not performed in other Scintilla lexers - highlighting of tasks such as TODOs in comments has been removed as it isn't present in other Scintilla lexers - colorig states and keywords have been renamed slightly - keywords at index 3 have been reserved for caller-supplied type names using SCI_SETKEYWORDS like in the C lexer (which we use in Geany to supply type names obtained using ctags) Add SCE_ZIG_COMMENTLINETOP state for top-level comments. Add styling function definitions I.e. identifiers after the "fn" keywords. Fixes #237.

nyamatongwe · 2024-09-15T00:04:20Z

Committed with some minor edits. The ID number clashed with SCLEX_DART so incremented to be unique. Changed comment from "ZIG" to "Zig" as this value is commonly the name used by the language's web site and that would be appropriate for UI. Change header inclusion order as this is defined in scripts/HeaderOrder.txt and checked by scintilla/scripts/HeaderCheck.py.

nyamatongwe added the zig Caused by the Zig lexer label Aug 22, 2024

techee mentioned this pull request Aug 23, 2024

Thumbs up hunt geany/geany#3938

Open

9 tasks

ArkadiuszMichalski mentioned this pull request Aug 28, 2024

[Feature request] Support Zig? notepad-plus-plus/notepad-plus-plus#14772

Open

techee added 3 commits September 14, 2024 13:57

Add encoding preamble to unit test

0498abd

Reduce lexer identifier buffer size to 64

f4aa39b

techee added 8 commits September 14, 2024 13:57

Use StyleIndexAt() instead of StyleAt() in folding code

f876627

Add SCE_ZIG_COMMENTLINETOP state for top-level comments

2f232a1

Allow up to 8B unicode escape sequence

99989dd

Add styling function definitions

5a81225

I.e. identifiers after the "fn" keywords.

Convert the Zig lexer to object lexer

731712e

Fix folding multiline strings

2cc4c19

Backtrack to the beginning of multiline strings/comments for folding

b4b3889

Update LexillaGen.py output

a592f87

techee force-pushed the zig branch from fe186d7 to a592f87 Compare September 14, 2024 12:06

nyamatongwe added the committed Issue fixed in repository but not in release label Sep 14, 2024

nyamatongwe closed this Oct 19, 2024

nyamatongwe removed the committed Issue fixed in repository but not in release label Dec 31, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Backport Zig lexer from Notepad4 #267

Backport Zig lexer from Notepad4 #267

techee commented Aug 22, 2024

techee commented Aug 22, 2024

techee commented Aug 23, 2024

nyamatongwe commented Aug 24, 2024

nyamatongwe commented Aug 24, 2024

zufuliu commented Aug 24, 2024

techee commented Aug 26, 2024

techee commented Aug 26, 2024

techee commented Sep 3, 2024

techee commented Sep 10, 2024

nyamatongwe commented Sep 11, 2024

techee commented Sep 11, 2024

techee commented Sep 11, 2024

nyamatongwe commented Sep 11, 2024

nyamatongwe commented Sep 12, 2024

zufuliu commented Sep 12, 2024

techee commented Sep 12, 2024

techee commented Sep 12, 2024

techee commented Sep 12, 2024

nyamatongwe commented Sep 12, 2024

techee commented Sep 13, 2024

nyamatongwe commented Sep 13, 2024

techee commented Sep 13, 2024

nyamatongwe commented Sep 13, 2024

techee commented Sep 14, 2024

nyamatongwe commented Sep 15, 2024

Backport Zig lexer from Notepad4 #267

Backport Zig lexer from Notepad4 #267

Conversation

techee commented Aug 22, 2024

techee commented Aug 22, 2024

techee commented Aug 23, 2024

nyamatongwe commented Aug 24, 2024

nyamatongwe commented Aug 24, 2024

zufuliu commented Aug 24, 2024

techee commented Aug 26, 2024

techee commented Aug 26, 2024

techee commented Sep 3, 2024

techee commented Sep 10, 2024

nyamatongwe commented Sep 11, 2024

techee commented Sep 11, 2024

techee commented Sep 11, 2024

nyamatongwe commented Sep 11, 2024

nyamatongwe commented Sep 12, 2024

zufuliu commented Sep 12, 2024

techee commented Sep 12, 2024

techee commented Sep 12, 2024

techee commented Sep 12, 2024

nyamatongwe commented Sep 12, 2024

techee commented Sep 13, 2024

nyamatongwe commented Sep 13, 2024

techee commented Sep 13, 2024

nyamatongwe commented Sep 13, 2024

techee commented Sep 14, 2024

nyamatongwe commented Sep 15, 2024