Fix Newline-Separated Full-Line Comment Handling

Handle formatting of comments like: // First Comment // Second Comment
octoberswimmer · Jan 7, 2025 · 9b55e31 · 9b55e31
1 parent 6da262b
commit 9b55e31
Show file tree

Hide file tree

Showing 7 changed files with 609 additions and 487 deletions.
diff --git a/formatter/comments_test.go b/formatter/comments_test.go
@@ -1,6 +1,7 @@
 package formatter
 
 import (
+	"fmt"
 	"testing"
 
 	"github.com/antlr4-go/antlr/v4"
@@ -12,6 +13,9 @@ import (
 func TestComments(t *testing.T) {
 	if testing.Verbose() {
 		log.SetLevel(log.DebugLevel)
+		log.SetFormatter(&log.TextFormatter{
+			DisableQuote: true,
+		})
 	}
 	tests :=
 		[]struct {
@@ -75,32 +79,36 @@ System.debug('I am on a separate line!');`,
 		contact.MailingCountry == 'United States');`,
 			},
 		}
-	for _, tt := range tests {
-		input := antlr.NewInputStream(tt.input)
-		lexer := parser.NewApexLexer(input)
-		stream := antlr.NewCommonTokenStream(lexer, antlr.TokenDefaultChannel)
-
-		p := parser.NewApexParser(stream)
-		p.RemoveErrorListeners()
-		p.AddErrorListener(&testErrorListener{t: t})
-
-		v := NewFormatVisitor(stream)
-		out, ok := v.visitRule(p.Statement()).(string)
-		if !ok {
-			t.Errorf("Unexpected result parsing apex")
-		}
-		out = removeExtraCommentIndentation(out)
-		if out != tt.output {
-			t.Errorf("unexpected format.  expected:\n%q\ngot:\n%q\n", tt.output, out)
-		}
+	for i, tt := range tests {
+		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
+			input := antlr.NewInputStream(tt.input)
+			lexer := parser.NewApexLexer(input)
+			stream := antlr.NewCommonTokenStream(lexer, antlr.TokenDefaultChannel)
+
+			p := parser.NewApexParser(stream)
+			p.RemoveErrorListeners()
+			p.AddErrorListener(&testErrorListener{t: t})
+
+			v := NewFormatVisitor(stream)
+			out, ok := v.visitRule(p.Statement()).(string)
+			if !ok {
+				t.Errorf("Unexpected result parsing apex")
+			}
+			out = removeExtraCommentIndentation(out)
+			if out != tt.output {
+				t.Errorf("unexpected format.  expected:\n%q\ngot:\n%q\n", tt.output, out)
+			}
+		})
 	}
 
 }
 
 func TestTrailingComments(t *testing.T) {
 	if testing.Verbose() {
 		log.SetLevel(log.DebugLevel)
-
+		log.SetFormatter(&log.TextFormatter{
+			DisableQuote: true,
+		})
 	}
 	tests :=
 		[]struct {
@@ -176,26 +184,44 @@ private class T1Exception {}`,
 
 	// Blank line before comment
 	private Integer i;
+}`,
+			},
+			{
+				`class TestClass {
+	public static void go() {
+	// First Comment
+
+	// Second Comment
+go();}}`,
+				`class TestClass {
+	public static void go() {
+		// First Comment
+
+		// Second Comment
+		go();
+	}
 }`,
 			},
 		}
-	for _, tt := range tests {
-		input := antlr.NewInputStream(tt.input)
-		lexer := parser.NewApexLexer(input)
-		stream := antlr.NewCommonTokenStream(lexer, antlr.TokenDefaultChannel)
-
-		p := parser.NewApexParser(stream)
-		p.RemoveErrorListeners()
-		p.AddErrorListener(&testErrorListener{t: t})
-
-		v := NewFormatVisitor(stream)
-		out, ok := v.visitRule(p.CompilationUnit()).(string)
-		if !ok {
-			t.Errorf("Unexpected result parsing apex")
-		}
-		out = removeExtraCommentIndentation(out)
-		if out != tt.output {
-			t.Errorf("unexpected format.  expected:\n%q\ngot:\n%q\n", tt.output, out)
-		}
+	for i, tt := range tests {
+		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
+			input := antlr.NewInputStream(tt.input)
+			lexer := parser.NewApexLexer(input)
+			stream := antlr.NewCommonTokenStream(lexer, antlr.TokenDefaultChannel)
+
+			p := parser.NewApexParser(stream)
+			p.RemoveErrorListeners()
+			p.AddErrorListener(&testErrorListener{t: t})
+
+			v := NewFormatVisitor(stream)
+			out, ok := v.visitRule(p.CompilationUnit()).(string)
+			if !ok {
+				t.Errorf("Unexpected result parsing apex")
+			}
+			out = removeExtraCommentIndentation(out)
+			if out != tt.output {
+				t.Errorf("unexpected format.  expected:\n%q\ngot:\n%q\n", tt.output, out)
+			}
+		})
 	}
 }
diff --git a/formatter/formatter.go b/formatter/formatter.go
@@ -11,6 +11,7 @@ import (
 
 	"github.com/antlr4-go/antlr/v4"
 	"github.com/octoberswimmer/apexfmt/parser"
+	log "github.com/sirupsen/logrus"
 )
 
 type Formatter struct {
@@ -106,6 +107,8 @@ func (f *Formatter) Write() error {
 	return writeFile(f.filename, f.formatted)
 }
 
+// removeIndentationFromComment removes extra tabs that were introduced during
+// formatting from a single multi-line comment.
 func removeIndentationFromComment(comment string) string {
 	// Find the position of the initial \uFFFA and the final \uFFFB
 	startIndex := strings.Index(comment, "\uFFFA")
@@ -139,17 +142,43 @@ func removeIndentationFromComment(comment string) string {
 	return unindented
 }
 
-// Comments are annotated in FormatVisitor.visitRule.  We preserve whitespace
-// within multi-line comments by removing the indentation added within the
-// comment.
+// removeExtraCommentIndentation cleans up the formatting of comments after the
+// formatter has run.
+//
+// This could probably be improved by rethinking the approach.  Preserving
+// comments is tricky.
+//
+// The antlr lexer pulls comments into a separate token stream so we don't need
+// to check for comments in every visit function.  Instead, we look for
+// comments, each represented as a single token, before the start of or after
+// the end of the current parser node.  Then we reinject the comments as we're
+// visiting each node.
+//
+// The visitor functions don't know about the comments so they introduce
+// whitespace around them when formatting and indenting the code.  We need to
+// ensure that the comments don't end up mangled.  We wrap the comments in
+// delimiters so we can easily identify the comments and clean up after
+// formatter runs.  This code cleans up the whitespace and removes the comment
+// delimiters.
 func removeExtraCommentIndentation(input string) string {
+	log.Trace(fmt.Sprintf("ADJUSTING  : %q", input))
 	// Remove extra grammar-specific newlines added unaware of newline-preserving comments injected
 	newlinePrefixedMultilineComment := regexp.MustCompile("[\n ]*(\t*\uFFFA)")
 	input = newlinePrefixedMultilineComment.ReplaceAllString(input, "$1")
+	log.Trace(fmt.Sprintf("ADJUSTED(1): %q", input))
 
 	// Remove extra grammar-specific space added unaware of newline-preserving comments injected
 	spacePaddedMultilineComment := regexp.MustCompile(`(` + "\uFFFB\n*\t*" + `) +`)
 	input = spacePaddedMultilineComment.ReplaceAllString(input, "$1")
+	log.Trace(fmt.Sprintf("ADJUSTED(2): %q", input))
+
+	// Remove extra indent-injected newlines
+	indentInjectedNewlines := regexp.MustCompile("\uFFFB\n+")
+	input = indentInjectedNewlines.ReplaceAllString(input, "\uFFFB\n")
+	log.Trace(fmt.Sprintf("ADJUSTED(3): %q", input))
+
+	input = strings.ReplaceAll(input, "\n\uFFFB\n", "\n\uFFFB")
+	log.Trace(fmt.Sprintf("ADJUSTED(4): %q", input))
 
 	newlinePrefixedInlineComment := regexp.MustCompile("\n\t*\uFFF9\n")
 	input = newlinePrefixedInlineComment.ReplaceAllString(input, "\uFFF9\n")
@@ -161,6 +190,7 @@ func removeExtraCommentIndentation(input string) string {
 	// Restore formatting of indented multi-line comments
 	multilineCommentPattern := regexp.MustCompile(`(?s)\t*` + "\uFFFA" + `.*?` + "\uFFFB")
 	unindented := multilineCommentPattern.ReplaceAllStringFunc(input, removeIndentationFromComment)
+	log.Trace(fmt.Sprintf("UNINDENTED : %q", input))
 
 	return unindented
 }

diff --git a/formatter/indent_test.go b/formatter/indent_test.go
@@ -3,10 +3,19 @@ package formatter
 import (
 	"bufio"
 	"bytes"
+	"fmt"
 	"testing"
+
+	log "github.com/sirupsen/logrus"
 )
 
 func TestIndent(t *testing.T) {
+	if testing.Verbose() {
+		log.SetLevel(log.DebugLevel)
+		log.SetFormatter(&log.TextFormatter{
+			DisableQuote: true,
+		})
+	}
 	tests :=
 		[]struct {
 			input  string
@@ -34,12 +43,18 @@ func TestIndent(t *testing.T) {
 				"public class B {\n\t\ufffa\n\t/**\n\t\t\t */\n\ufffb\tpublic X(Y client) {}\n}",
 				"\tpublic class B {\n\t\t\ufffa\n\t\t/**\n\t\t\t\t */\ufffb\n\t\tpublic X(Y client) {}\n\t}",
 			},
+			{
+				"\ufffa\n// First Comment\n\n\ufffb\ufffa// Second Comment\n\ufffbgo();",
+				"\t\ufffa\n\t// First Comment\n\ufffb\n\t\ufffa// Second Comment\n\ufffb\n\tgo();",
+			},
 		}
-	for _, tt := range tests {
-		out := indent(tt.input)
-		if out != tt.output {
-			t.Errorf("unexpected indent format.  expected:\n%q\ngot:\n%q\n", tt.output, out)
-		}
+	for i, tt := range tests {
+		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
+			out := indent(tt.input)
+			if out != tt.output {
+				t.Errorf("unexpected indent format.  expected:\n%q\ngot:\n%q\n", tt.output, out)
+			}
+		})
 	}
 }
 
@@ -81,6 +96,12 @@ func TestRemoveIndentation(t *testing.T) {
 }
 
 func TestSplitLeadingFFFAOrFFFBOrNewline(t *testing.T) {
+	if testing.Verbose() {
+		log.SetLevel(log.DebugLevel)
+		log.SetFormatter(&log.TextFormatter{
+			DisableQuote: true,
+		})
+	}
 	testCases := []struct {
 		name     string
 		input    string
@@ -134,11 +155,10 @@ func TestSplitLeadingFFFAOrFFFBOrNewline(t *testing.T) {
 		},
 		{
 			name:  "Delimiter with content on the same line",
-			input: "public class B {\n\t\ufffa some content\n}",
+			input: "public class B {\n\t\ufffa // some content\ufffb\n}",
 			expected: []string{
 				"public class B {",
-				"\t\ufffa",
-				" some content",
+				"\t\ufffa // some content\ufffb",
 				"}",
 			},
 		},
@@ -210,13 +230,11 @@ func TestSplitLeadingFFFAOrFFFBOrNewline(t *testing.T) {
 			},
 		},
 		{
-			name:  "Delimiter in the middle of the line (should split)",
-			input: "public class B {\n\tpublic \ufffa X(Y client) {}\n}",
+			name:  "Delimiter in the middle of the line",
+			input: "public class B {\n\tpublic \ufff9 /* inline comment */ \ufffb X(Y client) {}\n}",
 			expected: []string{
 				"public class B {",
-				"\tpublic ",
-				"\ufffa",
-				" X(Y client) {}",
+				"\tpublic \ufff9 /* inline comment */ \ufffb X(Y client) {}",
 				"}",
 			},
 		},
@@ -278,6 +296,14 @@ func TestSplitLeadingFFFAOrFFFBOrNewline(t *testing.T) {
 				"}",
 			},
 		},
+		{
+			name:  "Include content after \\ufffa",
+			input: "\ufffa// Second Comment\n\ufffbgo();",
+			expected: []string{
+				"\ufffa// Second Comment\n\ufffb",
+				"go();",
+			},
+		},
 	}
 
 	for _, tc := range testCases {
@@ -294,8 +320,8 @@ func TestSplitLeadingFFFAOrFFFBOrNewline(t *testing.T) {
 			}
 			if len(tokens) != len(tc.expected) {
 				t.Errorf("Expected %d tokens, got %d", len(tc.expected), len(tokens))
-				t.Errorf("Expected tokens: %v", tc.expected)
-				t.Errorf("Got tokens: %v", tokens)
+				t.Errorf("Expected tokens: %+v", tc.expected)
+				t.Errorf("Got tokens: %+v", tokens)
 				return
 			}
 			for i, expected := range tc.expected {