Skip to content

Commit

Permalink
Fix Newline-Separated Full-Line Comment Handling
Browse files Browse the repository at this point in the history
Handle formatting of comments like:
// First Comment

// Second Comment
  • Loading branch information
cwarden committed Jan 7, 2025
1 parent 6da262b commit 9b55e31
Show file tree
Hide file tree
Showing 7 changed files with 609 additions and 487 deletions.
100 changes: 63 additions & 37 deletions formatter/comments_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package formatter

import (
"fmt"
"testing"

"github.com/antlr4-go/antlr/v4"
Expand All @@ -12,6 +13,9 @@ import (
func TestComments(t *testing.T) {
if testing.Verbose() {
log.SetLevel(log.DebugLevel)
log.SetFormatter(&log.TextFormatter{
DisableQuote: true,
})
}
tests :=
[]struct {
Expand Down Expand Up @@ -75,32 +79,36 @@ System.debug('I am on a separate line!');`,
contact.MailingCountry == 'United States');`,
},
}
for _, tt := range tests {
input := antlr.NewInputStream(tt.input)
lexer := parser.NewApexLexer(input)
stream := antlr.NewCommonTokenStream(lexer, antlr.TokenDefaultChannel)

p := parser.NewApexParser(stream)
p.RemoveErrorListeners()
p.AddErrorListener(&testErrorListener{t: t})

v := NewFormatVisitor(stream)
out, ok := v.visitRule(p.Statement()).(string)
if !ok {
t.Errorf("Unexpected result parsing apex")
}
out = removeExtraCommentIndentation(out)
if out != tt.output {
t.Errorf("unexpected format. expected:\n%q\ngot:\n%q\n", tt.output, out)
}
for i, tt := range tests {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
input := antlr.NewInputStream(tt.input)
lexer := parser.NewApexLexer(input)
stream := antlr.NewCommonTokenStream(lexer, antlr.TokenDefaultChannel)

p := parser.NewApexParser(stream)
p.RemoveErrorListeners()
p.AddErrorListener(&testErrorListener{t: t})

v := NewFormatVisitor(stream)
out, ok := v.visitRule(p.Statement()).(string)
if !ok {
t.Errorf("Unexpected result parsing apex")
}
out = removeExtraCommentIndentation(out)
if out != tt.output {
t.Errorf("unexpected format. expected:\n%q\ngot:\n%q\n", tt.output, out)
}
})
}

}

func TestTrailingComments(t *testing.T) {
if testing.Verbose() {
log.SetLevel(log.DebugLevel)

log.SetFormatter(&log.TextFormatter{
DisableQuote: true,
})
}
tests :=
[]struct {
Expand Down Expand Up @@ -176,26 +184,44 @@ private class T1Exception {}`,
// Blank line before comment
private Integer i;
}`,
},
{
`class TestClass {
public static void go() {
// First Comment
// Second Comment
go();}}`,
`class TestClass {
public static void go() {
// First Comment
// Second Comment
go();
}
}`,
},
}
for _, tt := range tests {
input := antlr.NewInputStream(tt.input)
lexer := parser.NewApexLexer(input)
stream := antlr.NewCommonTokenStream(lexer, antlr.TokenDefaultChannel)

p := parser.NewApexParser(stream)
p.RemoveErrorListeners()
p.AddErrorListener(&testErrorListener{t: t})

v := NewFormatVisitor(stream)
out, ok := v.visitRule(p.CompilationUnit()).(string)
if !ok {
t.Errorf("Unexpected result parsing apex")
}
out = removeExtraCommentIndentation(out)
if out != tt.output {
t.Errorf("unexpected format. expected:\n%q\ngot:\n%q\n", tt.output, out)
}
for i, tt := range tests {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
input := antlr.NewInputStream(tt.input)
lexer := parser.NewApexLexer(input)
stream := antlr.NewCommonTokenStream(lexer, antlr.TokenDefaultChannel)

p := parser.NewApexParser(stream)
p.RemoveErrorListeners()
p.AddErrorListener(&testErrorListener{t: t})

v := NewFormatVisitor(stream)
out, ok := v.visitRule(p.CompilationUnit()).(string)
if !ok {
t.Errorf("Unexpected result parsing apex")
}
out = removeExtraCommentIndentation(out)
if out != tt.output {
t.Errorf("unexpected format. expected:\n%q\ngot:\n%q\n", tt.output, out)
}
})
}
}
36 changes: 33 additions & 3 deletions formatter/formatter.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/antlr4-go/antlr/v4"
"github.com/octoberswimmer/apexfmt/parser"
log "github.com/sirupsen/logrus"
)

type Formatter struct {
Expand Down Expand Up @@ -106,6 +107,8 @@ func (f *Formatter) Write() error {
return writeFile(f.filename, f.formatted)
}

// removeIndentationFromComment removes extra tabs that were introduced during
// formatting from a single multi-line comment.
func removeIndentationFromComment(comment string) string {
// Find the position of the initial \uFFFA and the final \uFFFB
startIndex := strings.Index(comment, "\uFFFA")
Expand Down Expand Up @@ -139,17 +142,43 @@ func removeIndentationFromComment(comment string) string {
return unindented
}

// Comments are annotated in FormatVisitor.visitRule. We preserve whitespace
// within multi-line comments by removing the indentation added within the
// comment.
// removeExtraCommentIndentation cleans up the formatting of comments after the
// formatter has run.
//
// This could probably be improved by rethinking the approach. Preserving
// comments is tricky.
//
// The antlr lexer pulls comments into a separate token stream so we don't need
// to check for comments in every visit function. Instead, we look for
// comments, each represented as a single token, before the start of or after
// the end of the current parser node. Then we reinject the comments as we're
// visiting each node.
//
// The visitor functions don't know about the comments so they introduce
// whitespace around them when formatting and indenting the code. We need to
// ensure that the comments don't end up mangled. We wrap the comments in
// delimiters so we can easily identify the comments and clean up after
// formatter runs. This code cleans up the whitespace and removes the comment
// delimiters.
func removeExtraCommentIndentation(input string) string {
log.Trace(fmt.Sprintf("ADJUSTING : %q", input))
// Remove extra grammar-specific newlines added unaware of newline-preserving comments injected
newlinePrefixedMultilineComment := regexp.MustCompile("[\n ]*(\t*\uFFFA)")
input = newlinePrefixedMultilineComment.ReplaceAllString(input, "$1")
log.Trace(fmt.Sprintf("ADJUSTED(1): %q", input))

// Remove extra grammar-specific space added unaware of newline-preserving comments injected
spacePaddedMultilineComment := regexp.MustCompile(`(` + "\uFFFB\n*\t*" + `) +`)
input = spacePaddedMultilineComment.ReplaceAllString(input, "$1")
log.Trace(fmt.Sprintf("ADJUSTED(2): %q", input))

// Remove extra indent-injected newlines
indentInjectedNewlines := regexp.MustCompile("\uFFFB\n+")
input = indentInjectedNewlines.ReplaceAllString(input, "\uFFFB\n")
log.Trace(fmt.Sprintf("ADJUSTED(3): %q", input))

input = strings.ReplaceAll(input, "\n\uFFFB\n", "\n\uFFFB")
log.Trace(fmt.Sprintf("ADJUSTED(4): %q", input))

newlinePrefixedInlineComment := regexp.MustCompile("\n\t*\uFFF9\n")
input = newlinePrefixedInlineComment.ReplaceAllString(input, "\uFFF9\n")
Expand All @@ -161,6 +190,7 @@ func removeExtraCommentIndentation(input string) string {
// Restore formatting of indented multi-line comments
multilineCommentPattern := regexp.MustCompile(`(?s)\t*` + "\uFFFA" + `.*?` + "\uFFFB")
unindented := multilineCommentPattern.ReplaceAllStringFunc(input, removeIndentationFromComment)
log.Trace(fmt.Sprintf("UNINDENTED : %q", input))

return unindented
}
Expand Down
56 changes: 41 additions & 15 deletions formatter/indent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,19 @@ package formatter
import (
"bufio"
"bytes"
"fmt"
"testing"

log "github.com/sirupsen/logrus"
)

func TestIndent(t *testing.T) {
if testing.Verbose() {
log.SetLevel(log.DebugLevel)
log.SetFormatter(&log.TextFormatter{
DisableQuote: true,
})
}
tests :=
[]struct {
input string
Expand Down Expand Up @@ -34,12 +43,18 @@ func TestIndent(t *testing.T) {
"public class B {\n\t\ufffa\n\t/**\n\t\t\t */\n\ufffb\tpublic X(Y client) {}\n}",
"\tpublic class B {\n\t\t\ufffa\n\t\t/**\n\t\t\t\t */\ufffb\n\t\tpublic X(Y client) {}\n\t}",
},
{
"\ufffa\n// First Comment\n\n\ufffb\ufffa// Second Comment\n\ufffbgo();",
"\t\ufffa\n\t// First Comment\n\ufffb\n\t\ufffa// Second Comment\n\ufffb\n\tgo();",
},
}
for _, tt := range tests {
out := indent(tt.input)
if out != tt.output {
t.Errorf("unexpected indent format. expected:\n%q\ngot:\n%q\n", tt.output, out)
}
for i, tt := range tests {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
out := indent(tt.input)
if out != tt.output {
t.Errorf("unexpected indent format. expected:\n%q\ngot:\n%q\n", tt.output, out)
}
})
}
}

Expand Down Expand Up @@ -81,6 +96,12 @@ func TestRemoveIndentation(t *testing.T) {
}

func TestSplitLeadingFFFAOrFFFBOrNewline(t *testing.T) {
if testing.Verbose() {
log.SetLevel(log.DebugLevel)
log.SetFormatter(&log.TextFormatter{
DisableQuote: true,
})
}
testCases := []struct {
name string
input string
Expand Down Expand Up @@ -134,11 +155,10 @@ func TestSplitLeadingFFFAOrFFFBOrNewline(t *testing.T) {
},
{
name: "Delimiter with content on the same line",
input: "public class B {\n\t\ufffa some content\n}",
input: "public class B {\n\t\ufffa // some content\ufffb\n}",
expected: []string{
"public class B {",
"\t\ufffa",
" some content",
"\t\ufffa // some content\ufffb",
"}",
},
},
Expand Down Expand Up @@ -210,13 +230,11 @@ func TestSplitLeadingFFFAOrFFFBOrNewline(t *testing.T) {
},
},
{
name: "Delimiter in the middle of the line (should split)",
input: "public class B {\n\tpublic \ufffa X(Y client) {}\n}",
name: "Delimiter in the middle of the line",
input: "public class B {\n\tpublic \ufff9 /* inline comment */ \ufffb X(Y client) {}\n}",
expected: []string{
"public class B {",
"\tpublic ",
"\ufffa",
" X(Y client) {}",
"\tpublic \ufff9 /* inline comment */ \ufffb X(Y client) {}",
"}",
},
},
Expand Down Expand Up @@ -278,6 +296,14 @@ func TestSplitLeadingFFFAOrFFFBOrNewline(t *testing.T) {
"}",
},
},
{
name: "Include content after \\ufffa",
input: "\ufffa// Second Comment\n\ufffbgo();",
expected: []string{
"\ufffa// Second Comment\n\ufffb",
"go();",
},
},
}

for _, tc := range testCases {
Expand All @@ -294,8 +320,8 @@ func TestSplitLeadingFFFAOrFFFBOrNewline(t *testing.T) {
}
if len(tokens) != len(tc.expected) {
t.Errorf("Expected %d tokens, got %d", len(tc.expected), len(tokens))
t.Errorf("Expected tokens: %v", tc.expected)
t.Errorf("Got tokens: %v", tokens)
t.Errorf("Expected tokens: %+v", tc.expected)
t.Errorf("Got tokens: %+v", tokens)
return
}
for i, expected := range tc.expected {
Expand Down
Loading

0 comments on commit 9b55e31

Please sign in to comment.