From 856c2ae7768d8fb36cffdb434d7349eae5b888ae Mon Sep 17 00:00:00 2001 From: Federico Ficarelli Date: Tue, 12 Nov 2019 18:00:58 +0100 Subject: [PATCH] Add remaining golden file tests --- cool.tm | 4 +- lexer_test.go | 70 +++++++++++-------- testdata/escapednull.cool.lexer.gold.json | 7 ++ .../null_in_string.cl.cool.lexer.gold.json | 7 ++ ...followed_by_tokens.cl.cool.lexer.gold.json | 17 +++++ 5 files changed, 75 insertions(+), 30 deletions(-) create mode 100644 testdata/escapednull.cool.lexer.gold.json create mode 100644 testdata/null_in_string.cl.cool.lexer.gold.json create mode 100644 testdata/null_in_string_followed_by_tokens.cl.cool.lexer.gold.json diff --git a/cool.tm b/cool.tm index 230a6f4..528abb3 100644 --- a/cool.tm +++ b/cool.tm @@ -74,8 +74,8 @@ invalid_token: /"({strRune}*(\\?\x00){strRune}*)+"/ # Note: It's unclear from the language spec whether multiple unescaped '\n' # should produce a single invalid token or not. No golden files with # this case are available but 's19.test.cool' shows that a single '\n' -# splits the invalid literal in two halves. Leaving the rule commented -# out and looking for clarifications. +# splits the invalid literal in two lexable halves. Leaving the rule +# commented out while looking for clarifications. # invalid_token: /"({strRune}*([^\\]?\n){strRune}*)+"/ # <- This needs backtracking! StringLiteral: /"{strRune}*"/ diff --git a/lexer_test.go b/lexer_test.go index 3bc4ec5..b68f95d 100644 --- a/lexer_test.go +++ b/lexer_test.go @@ -1,13 +1,12 @@ package cool import ( - "bytes" "encoding/json" "github.com/google/go-cmp/cmp" "io/ioutil" "log" + "path" "testing" - "text/template" ) type SourceToken struct { @@ -19,7 +18,7 @@ type SourceToken struct { func TestLexerSnippets(t *testing.T) { for _, tt := range testSnippets { t.Run(tt.name, func(t *testing.T) { - got := scan(tt.source) + got := scanTerminals(tt.source) if diff := cmp.Diff(tt.tokens, got); diff != "" { t.Errorf("lex mismatch (-want +got):\n%s", diff) } @@ -27,27 +26,18 @@ func TestLexerSnippets(t *testing.T) { } } -func TestLexerFiles(t *testing.T) { - temp, err := template.New("golden").Parse("{{.}}.lexer.gold.json") - if err != nil { - panic(err) - } +func TestLexerGoldFiles(t *testing.T) { for _, sourceFileName := range testFiles { - t.Run(sourceFileName, func(t *testing.T) { + t.Run(path.Base(sourceFileName), func(t *testing.T) { // Source sourceBuf, err := ioutil.ReadFile(sourceFileName) if err != nil { log.Fatalln(err) } source := string(sourceBuf) - sourceTokens := scanSource(source) + sourceTokens := scanSourceTokens(source) // Golden - var b bytes.Buffer - err = temp.Execute(&b, sourceFileName) - if err != nil { - log.Fatalln(err) - } - goldFileName := b.String() + goldFileName := sourceFileName + ".lexer.gold.json" goldBuf, err := ioutil.ReadFile(goldFileName) if err != nil { log.Fatalln(err) @@ -57,15 +47,29 @@ func TestLexerFiles(t *testing.T) { if err != nil { log.Fatalln(err) } - // Compare - if diff := cmp.Diff(goldTokens, sourceTokens); diff != "" { - t.Errorf("lex mismatch (-want +got):\n%s", diff) - } + t.Run("Terminals", func(t *testing.T) { + var goldTokenIds []Token + for _, tok := range goldTokens { + goldTokenIds = append(goldTokenIds, tok.Terminal) + } + var sourceTokenIds []Token + for _, tok := range sourceTokens { + sourceTokenIds = append(sourceTokenIds, tok.Terminal) + } + if diff := cmp.Diff(goldTokenIds, sourceTokenIds); diff != "" { + t.Errorf("lex mismatch (-want +got):\n%s", diff) + } + }) + t.Run("Values", func(t *testing.T) { + if diff := cmp.Diff(goldTokens, sourceTokens); diff != "" { + t.Errorf("lex mismatch (-want +got):\n%s", diff) + } + }) }) } } -func scanSource(source string) []SourceToken { +func scanSourceTokens(source string) []SourceToken { var lex Lexer lex.Init(source) var tokens []SourceToken @@ -75,9 +79,9 @@ func scanSource(source string) []SourceToken { return tokens } -func scan(source string) []Token { +func scanTerminals(source string) []Token { var tokens []Token - for _, t := range scanSource(source) { + for _, t := range scanSourceTokens(source) { tokens = append(tokens, t.Terminal) } return tokens @@ -100,6 +104,8 @@ var testSnippets = []struct { {"Identifier", "object Type oBJECT", []Token{OBJECTID, TYPEID, OBJECTID}}, {"IntegerLiteral", "0 000 0000 01234567890", []Token{INTEGERLITERAL, INTEGERLITERAL, INTEGERLITERAL, INTEGERLITERAL}}, {"StringLiteral", "\"\" \" \" \" foo \"", []Token{STRINGLITERAL, STRINGLITERAL, STRINGLITERAL}}, + {"StringLiteralEscapes", "\" \\a\\b\\\"\\c\\\"\\d\\\\\\\n \"", []Token{STRINGLITERAL}}, + {"EmptyStringLiteral", "\"\"", []Token{STRINGLITERAL}}, {"Whitespace", " \t\t \f \v \r\r\r\n\n ", nil}, {"BoolLiteral", "true false tRUE fALSE True False", []Token{BOOLLITERAL, BOOLLITERAL, BOOLLITERAL, BOOLLITERAL, TYPEID, TYPEID}}, {"KeywordClass", "class CLASS Class cLASS", []Token{CLASS, CLASS, CLASS, CLASS}}, @@ -164,6 +170,16 @@ var testSnippets = []struct { {"TokenAndInvalidRSub", "a ] a", []Token{OBJECTID, INVALID_TOKEN, OBJECTID}}, {"TokenAndInvalidBackslash", "a \\ a", []Token{OBJECTID, INVALID_TOKEN, OBJECTID}}, {"TokenAndInvalidPipe", "a | a", []Token{OBJECTID, INVALID_TOKEN, OBJECTID}}, + {"InvalidNull", "\x00", []Token{INVALID_TOKEN}}, + {"TokenAndInvalidNull", "a \x00 a\x00a", []Token{OBJECTID, INVALID_TOKEN, OBJECTID, INVALID_TOKEN, OBJECTID}}, + {"OneNullInStringLiteral", "\"this is a string \x00 literal\"", []Token{INVALID_TOKEN}}, + {"TwoNullInStringLiteral", "\"this is \x00 a string \x00 literal\"", []Token{INVALID_TOKEN}}, + {"OneEscapedNullInStringLiteral", "\"this is an ill formed string \\\x00 literal\"", []Token{INVALID_TOKEN}}, + {"TwoEscapedNullInStringLiteral", "\"this is \\\x00 an ill formed string \\\x00 literal\"", []Token{INVALID_TOKEN}}, + // Unescaped '\n' split string literals in lexable chunks. + // Unclear if it is the right behaviour, no hints from the lang spec. + // See the 'StringLiteral' rule for details. + {"StringLiteralUnsecapedNewline", "\"a \n b\"", []Token{INVALID_TOKEN, OBJECTID, INVALID_TOKEN}}, } var testFiles = []string{ @@ -227,9 +243,7 @@ var testFiles = []string{ "testdata/objectid.test.cool", "testdata/palindrome.cool", "testdata/sort_list.cl.cool", + "testdata/escapednull.cool", + "testdata/null_in_string.cl.cool", + "testdata/null_in_string_followed_by_tokens.cl.cool", } - -// TODO -// FAIL {"testdata/escapednull.cool", []Token{INVALID_TOKEN}}, -// FAIL {"testdata/null_in_string.cl.cool", []Token{INVALID_TOKEN}}, -// FAIL {"testdata/null_in_string_followed_by_tokens.cl.cool", []Token{INVALID_TOKEN, OBJECTID, PLUS}}, diff --git a/testdata/escapednull.cool.lexer.gold.json b/testdata/escapednull.cool.lexer.gold.json new file mode 100644 index 0000000..0bab3fa --- /dev/null +++ b/testdata/escapednull.cool.lexer.gold.json @@ -0,0 +1,7 @@ +[ + { + "line": 1, + "token": 1, + "source": "\"This contains an escaped null character \\\u0000\"" + } +] diff --git a/testdata/null_in_string.cl.cool.lexer.gold.json b/testdata/null_in_string.cl.cool.lexer.gold.json new file mode 100644 index 0000000..6283e4e --- /dev/null +++ b/testdata/null_in_string.cl.cool.lexer.gold.json @@ -0,0 +1,7 @@ +[ + { + "line": 2, + "token": 1, + "source": "\"null character is here -\u003e\u0000\u003c-\"" + } +] diff --git a/testdata/null_in_string_followed_by_tokens.cl.cool.lexer.gold.json b/testdata/null_in_string_followed_by_tokens.cl.cool.lexer.gold.json new file mode 100644 index 0000000..4705460 --- /dev/null +++ b/testdata/null_in_string_followed_by_tokens.cl.cool.lexer.gold.json @@ -0,0 +1,17 @@ +[ + { + "line": 2, + "token": 1, + "source": "\"null character is here -\u003e\u0000\u003c-\"" + }, + { + "line": 2, + "token": 7, + "source": "a" + }, + { + "line": 2, + "token": 40, + "source": "+" + } +]