Skip to content

Commit

Permalink
Merge pull request #11 from nilium/allow-braces-in-barewords
Browse files Browse the repository at this point in the history
Allow braces & comment runes in barewords
  • Loading branch information
nilium authored Jul 10, 2018
2 parents 807334d + f3b7cf5 commit a220d8b
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 40 deletions.
69 changes: 47 additions & 22 deletions lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -406,9 +406,23 @@ func isBarewordRune(r rune) bool {
!isBarewordForbidden(r)
}

// isBarewordTransition returns true if r is valid inside of a token that is not a bareword but
// would become one by consuming r.
func isBarewordTransition(r rune) bool {
return unicode.In(r, barewordTables...) &&
!isStatementSep(r)
}

// isBarewordForbidden returns true if r is one of the characters that may not appear in a bareword.
func isBarewordForbidden(r rune) bool {
return isStatementSep(r) || unicode.IsControl(r)
return isWordSep(r) || unicode.IsControl(r)
}

func isWordSep(r rune) bool {
return unicode.IsSpace(r) ||
r == rSentinel || // End statement
r == rDoubleQuote || // Quoted string
r == rBackQuote // Raw string
}

func isStatementSep(r rune) bool {
Expand All @@ -419,8 +433,7 @@ func isStatementSep(r rune) bool {
r == rBracketOpen || // Open array
r == rBracketClose || // Close array
r == rDoubleQuote || // Quoted string
r == rBackQuote || // Raw string
r == rComment // Comment
r == rBackQuote // Raw string
}

func isLongIntervalInitial(r rune) bool {
Expand Down Expand Up @@ -561,8 +574,20 @@ func (l *Lexer) lexSegment(r rune) (Token, consumerFunc, error) {

func (l *Lexer) lexWordTail(next consumerFunc) consumerFunc {
var wordConsumer consumerFunc
var braces int
wordConsumer = func(r rune) (Token, consumerFunc, error) {
if isBarewordRune(r) {
switch {
case r == rCurlOpen || r == rBracketOpen:
braces++
l.buffer(r, r)
return noToken, wordConsumer, nil
case (r == rCurlClose || r == rBracketClose):
if braces <= 0 {
break
}
braces--
fallthrough
case isBarewordRune(r):
l.buffer(r, r)
return noToken, wordConsumer, nil
}
Expand Down Expand Up @@ -603,7 +628,7 @@ func (l *Lexer) lexSignedNumber(r rune) (Token, consumerFunc, error) {
case isStatementSep(r) || r == eof:
l.unread()
return l.lexBecomeWord(-1)
case isBarewordRune(r):
case isBarewordTransition(r):
return l.lexBecomeWord(r)
}
return noToken, nil, fmt.Errorf("unexpected character %q: expected number after sign", r)
Expand All @@ -629,7 +654,7 @@ func (l *Lexer) lexOctalNumber(r rune) (Token, consumerFunc, error) {
l.unread()
tok, err := l.valueToken(TOctal, parseBaseInt(8))
return tok, l.lexSegment, err
case isBarewordRune(r):
case isBarewordTransition(r):
return l.lexBecomeWord(r)
}
return noToken, nil, fmt.Errorf("unexpected character %q: expected octal digit or separator", r)
Expand All @@ -656,7 +681,7 @@ func (l *Lexer) lexHexNum(r rune) (Token, consumerFunc, error) {
l.unread()
tok, err := l.valueToken(THex, parseBaseInt(16))
return tok, l.lexSegment, err
case isBarewordRune(r):
case isBarewordTransition(r):
return l.lexBecomeWord(r)
}
return noToken, nil, fmt.Errorf("unexpected character %q: expected hex digit or separator", r)
Expand All @@ -671,7 +696,7 @@ func (l *Lexer) lexBinNum(r rune) (Token, consumerFunc, error) {
l.unread()
tok, err := l.valueToken(TBinary, parseBaseInt(2))
return tok, l.lexSegment, err
case isBarewordRune(r):
case isBarewordTransition(r):
return l.lexBecomeWord(r)
}
return noToken, nil, fmt.Errorf("unexpected character %q: expected binary digit or separator", r)
Expand All @@ -695,7 +720,7 @@ func (l *Lexer) lexRationalDenomInitial(r rune) (Token, consumerFunc, error) {
case isStatementSep(r) || r == eof:
l.unread()
return l.lexBecomeWord(-1)
case isBarewordRune(r):
case isBarewordTransition(r):
return l.lexBecomeWord(r)
}
return noToken, nil, fmt.Errorf("unexpected character %q: expected positive number", r)
Expand All @@ -706,7 +731,7 @@ func (l *Lexer) lexRationalDenomTail(r rune) (Token, consumerFunc, error) {
case isDecimal(r):
l.buffer(r, r)
return noToken, l.lexRationalDenomTail, nil
case isBarewordRune(r):
case isBarewordTransition(r):
return l.lexBecomeWord(r)
case isStatementSep(r) || r == eof:
l.unread()
Expand Down Expand Up @@ -757,7 +782,7 @@ func (l *Lexer) lexFloatExponentUnsigned(r rune) (Token, consumerFunc, error) {
case r == eof || isStatementSep(r):
l.unread()
return l.lexBecomeWord(-1)
case isBarewordRune(r):
case isBarewordTransition(r):
return l.lexBecomeWord(r)
}
return noToken, nil, fmt.Errorf("unexpected character %q: expected sign or digit", r)
Expand All @@ -776,7 +801,7 @@ func (l *Lexer) lexFloatExponentSignedTail(r rune) (Token, consumerFunc, error)
l.unread()
tok, err := l.valueToken(TFloat, parseBigFloat(l.Precision))
return tok, l.lexSegment, err
case isBarewordRune(r):
case isBarewordTransition(r):
return l.lexBecomeWord(r)
}
return noToken, nil, fmt.Errorf("unexpected character %q: expected digit or separator", r)
Expand Down Expand Up @@ -816,7 +841,7 @@ func (l *Lexer) lexFloatEnd(r rune) (Token, consumerFunc, error) {
l.unread()
tok, err := l.valueToken(TFloat, parseBigFloat(l.Precision))
return tok, l.lexSegment, err
case isBarewordRune(r):
case isBarewordTransition(r):
return l.lexBecomeWord(r)
}
return noToken, nil, fmt.Errorf("unexpected character %q: expected separator", r)
Expand All @@ -835,7 +860,7 @@ func (l *Lexer) lexFloatPointInitial(r rune) (Token, consumerFunc, error) {
case r == eof || isStatementSep(r):
l.unread()
return l.lexBecomeWord(-1)
case isBarewordRune(r):
case isBarewordTransition(r):
return l.lexBecomeWord(r)
}
return l.lexFloatPoint(r)
Expand Down Expand Up @@ -865,7 +890,7 @@ func (l *Lexer) lexFloatPoint(r rune) (Token, consumerFunc, error) {
l.unread()
tok, err := l.valueToken(TFloat, parseBigFloat(l.Precision))
return tok, l.lexSegment, err
case isBarewordRune(r):
case isBarewordTransition(r):
return l.lexBecomeWord(r)
}
return noToken, nil, fmt.Errorf("unexpected character %q: expected digit, exponent, or separator", r)
Expand Down Expand Up @@ -909,7 +934,7 @@ func (l *Lexer) lexIntervalUnitMaybeLong(r rune) (Token, consumerFunc, error) {
return noToken, l.lexIntervalInitial, nil
case isStatementSep(r) || r == eof:
return l.lexIntervalInitial(r)
case isBarewordRune(r):
case isBarewordTransition(r):
return l.lexBecomeWord(r)
}
return noToken, nil, fmt.Errorf("unexpected character %q: expected digit or 's'", r)
Expand All @@ -931,7 +956,7 @@ func (l *Lexer) lexIntervalUnitLong(r rune) (Token, consumerFunc, error) {
case isStatementSep(r) || r == eof:
l.unread()
return l.lexBecomeWord(-1)
case isBarewordRune(r):
case isBarewordTransition(r):
return l.lexBecomeWord(r)
}
return noToken, nil, fmt.Errorf("unexpected character %q: expected 's'", r)
Expand Down Expand Up @@ -972,7 +997,7 @@ func (l *Lexer) lexIntervalFloatTail(r rune) (Token, consumerFunc, error) {
case isStatementSep(r) || r == eof:
l.unread()
return l.lexBecomeWord(-1)
case isBarewordRune(r):
case isBarewordTransition(r):
return l.lexBecomeWord(r)
}
return noToken, l.lexIntervalFloatTail, fmt.Errorf("unexpected character %s: expected digit or interval unit", TDuration)
Expand Down Expand Up @@ -1013,7 +1038,7 @@ func (l *Lexer) lexInterval(r rune) (Token, consumerFunc, error) {
case isStatementSep(r) || r == eof:
l.unread()
return l.lexBecomeWord(-1)
case isBarewordRune(r):
case isBarewordTransition(r):
return l.lexBecomeWord(r)
}
return noToken, nil, fmt.Errorf("unexpected character %q: expected number or interval unit", r)
Expand Down Expand Up @@ -1058,7 +1083,7 @@ func (l *Lexer) lexZero(r rune) (Token, consumerFunc, error) {
case r == 'E' || r == 'e':
l.buffer(r, r)
return noToken, l.lexFloatExponentUnsigned, nil
case isBarewordRune(r):
case isBarewordTransition(r):
return l.lexBecomeWord(r)
}
return noToken, nil, fmt.Errorf("unexpected character %q: expected b, x, X, octal, duration unit, or separator", r)
Expand Down Expand Up @@ -1117,7 +1142,7 @@ func (l *Lexer) lexNonZero(r rune) (Token, consumerFunc, error) {
return noToken, l.lexFloatExponentUnsigned, nil
}

if isBarewordRune(r) {
if isBarewordTransition(r) {
return l.lexBecomeWord(r)
}

Expand Down Expand Up @@ -1150,7 +1175,7 @@ func (l *Lexer) lexBaseNumber(neg bool, base int) (consumer consumerFunc) {
n++
l.buffer(r, r)
return noToken, consumer, nil
} else if isBarewordRune(r) {
} else if isBarewordTransition(r) {
return l.lexBecomeWord(r)
} else if n == 0 && (isStatementSep(r) || r == eof) {
l.unread()
Expand Down
43 changes: 26 additions & 17 deletions lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,10 +283,9 @@ func TestBooleans(t *testing.T) {
_ws, {Token: Token{Kind: TWord, Raw: []byte("true"), Value: "true"}},
_ws, {Token: Token{Kind: TWord, Raw: []byte("Yes"), Value: "Yes"}},
_ws, {Token: Token{Kind: TWord, Raw: []byte("FALSE"), Value: "FALSE"}},
_curlopen,
_curlclose,
_ws, _curlopen, _curlclose,
_eof,
}.Run(t, "TRUE true Yes FALSE{}")
}.Run(t, "TRUE true Yes FALSE {}")
}

func TestStatement(t *testing.T) {
Expand All @@ -297,26 +296,35 @@ func TestStatement(t *testing.T) {
_ws, {Token: Token{Kind: TOctal, Raw: []byte("+0600"), Value: big.NewInt(0600)}},
_ws, {Token: Token{Kind: THex, Raw: []byte("-0xf"), Value: big.NewInt(-15)}},
_ws, {Token: Token{Kind: THex, Raw: []byte("0x12f"), Value: big.NewInt(303)}},
_ws, wordCase("${FOO:-${BAZ:-Default}}"),
_semicolon,
_ws, {Token: Token{Kind: TWord, Raw: []byte("stmt/2"), Value: "stmt/2"}},
_semicolon,
_ws, {Token: Token{Kind: TWord, Raw: []byte("sect"), Value: "sect"}},
_curlopen, _curlclose,
_ws, {Token: Token{Kind: TWord, Raw: []byte("a"), Value: "a"}},
_semicolon,
_ws, {Token: Token{Kind: TWord, Raw: []byte("b"), Value: "b"}},
_curlopen, _curlclose,
_ws, {Token: Token{Kind: TWord, Raw: []byte("c"), Value: "c"}},
_comment,
_ws, wordCase("stmt/2"), _semicolon,
_ws, wordCase("stmt{}"), _semicolon,
_ws, wordCase("invalid"), _curlclose,
_ws, wordCase("sect"), _ws, _curlopen, _curlclose,
_ws, wordCase("a"), _semicolon,
_ws, wordCase("b{}"),
_ws, wordCase("c'foo"),
_ws, _comment,
_ws, wordCase("#[foo]"),
_ws, wordCase("$[foo]"),
_ws, wordCase("${foo}"),
_ws, wordCase("${{foo}}"),
_ws, wordCase("${[foo}]"),
_ws, wordCase("${foo}"), _curlclose,
_ws, wordCase("${foo]"), _bracketclose,
_ws, _semicolon, _semicolon,
_ws, _eof,
}.Run(t, `
stmt -1234 +0600 -0xf 0x12f;
stmt -1234 +0600 -0xf 0x12f ${FOO:-${BAZ:-Default}};
stmt/2;
sect{}
stmt{};
invalid}
sect {}
a;
b{}
c'foo
c'foo 'foo
#[foo] $[foo] ${foo} ${{foo}} ${[foo}] ${foo}} ${foo]]
;;
`)
}
Expand Down Expand Up @@ -726,6 +734,7 @@ func TestFloats(t *testing.T) {
{Token: Token{Kind: TWord, Value: "k"}},
_ws, dec("0.5"),
_curlclose,
_ws, dec("0e123"),
_ws, dec("0.0e0"),
_ws, dec("0.0E0"),
_ws, dec("1.2345"),
Expand All @@ -739,7 +748,7 @@ func TestFloats(t *testing.T) {
-1.2345 -12345e-4 -1.2345e4 -1.2345e+4
+0.0 +0.5 +0.0e0 +0.0E0
+1.2345 +12345E-4 +1.2345E4 +1.2345E+4
[0.0] #{k 0.5} 0.0e0 0.0E0
[0.0] #{k 0.5} 0e123 0.0e0 0.0E0
1.2345 12345e-4 1.2345e4 1.2345e+4
;`)

Expand Down
2 changes: 1 addition & 1 deletion parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ func TestParseAST(t *testing.T) {
},
{
Name: "MinimalSpace",
Src: `sect[]#{}{stmt #{k[2]"p"#{}}true[false];}`,
Src: `sect []#{}{stmt #{k [2]"p"#{}}true [false];}`,
Doc: doc().section("sect", mkexprs(), mkmap()).
statement("stmt", mkmap("k", mkexprs(2), "p", mkmap()), true, mkexprs(false)).
Doc(),
Expand Down

0 comments on commit a220d8b

Please sign in to comment.