Skip to content

Commit

Permalink
Added the caseless keyword for case insensitive matching
Browse files Browse the repository at this point in the history
  • Loading branch information
jmeaster30 committed Apr 15, 2023
1 parent e0234cc commit fb38ff2
Show file tree
Hide file tree
Showing 10 changed files with 71 additions and 15 deletions.
2 changes: 1 addition & 1 deletion libvore-syntax-highlighter/syntaxes/vore.tmLanguage.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
"charClass": {
"patterns": [{
"name": "entity.name.class.vore",
"match": "\\b(any|whitespace|digit|upper|lower|letter|line|file|word|start|end|whole|begin|if|then|else|loop)\\b"
"match": "\\b(any|whitespace|digit|upper|lower|letter|line|file|word|start|end|whole|begin|if|then|else|loop|caseless)\\b"
}]
},
"variable": {
Expand Down
5 changes: 3 additions & 2 deletions libvore/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,9 @@ func (r AstRange) print() {
}

type AstString struct {
not bool
value string
not bool
value string
caseless bool
}

func (s AstString) isLiteral() {}
Expand Down
7 changes: 4 additions & 3 deletions libvore/bytecode.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,16 +191,17 @@ type ReplaceInstruction interface {
}

type MatchLiteral struct {
not bool
toFind string
not bool
toFind string
caseless bool
}

func (i MatchLiteral) adjust(offset int, state *GenState) SearchInstruction {
return i
}
func (i MatchLiteral) execute(current_state *SearchEngineState) *SearchEngineState {
next_state := current_state.Copy()
next_state.MATCH(i.toFind, i.not)
next_state.MATCH(i.toFind, i.not, i.caseless)
return next_state
}

Expand Down
17 changes: 13 additions & 4 deletions libvore/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package libvore

import (
"strconv"
"strings"
)

type Status int
Expand Down Expand Up @@ -434,12 +435,20 @@ func (es *SearchEngineState) MATCHOPTIONS(options []string, not bool) {
}
}

func (es *SearchEngineState) MATCH(value string, not bool) {
func compare(a string, b string, caseless bool) bool {
if caseless {
return strings.EqualFold(a, b)
} else {
return a == b
}
}

func (es *SearchEngineState) MATCH(value string, not bool, caseless bool) {
comp := es.READ(len(value))
if !not && value == comp {
if !not && compare(value, comp, caseless) {
es.CONSUME(len(value))
es.NEXT()
} else if not && value != comp {
} else if not && !compare(value, comp, caseless) {
es.CONSUME(len(value))
es.NEXT()
} else {
Expand All @@ -455,7 +464,7 @@ func (es *SearchEngineState) MATCHVAR(name string) {
// TODO add syntax for indexing hash maps but also I want something a bit better than just failing here
es.BACKTRACK()
} else {
es.MATCH(value.String().Value, false)
es.MATCH(value.String().Value, false, false)
}
}

Expand Down
5 changes: 3 additions & 2 deletions libvore/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -406,8 +406,9 @@ func (l *AstRange) generate(offset int, state *GenState) ([]SearchInstruction, e

func (l *AstString) generate(offset int, state *GenState) ([]SearchInstruction, error) {
result := MatchLiteral{
toFind: l.value,
not: l.not,
toFind: l.value,
not: l.not,
caseless: l.caseless,
}
return []SearchInstruction{result}, nil
}
Expand Down
5 changes: 5 additions & 0 deletions libvore/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ const (
BEGIN

// keywords
CASELESS
NOT
AT
LEAST
Expand Down Expand Up @@ -254,6 +255,8 @@ func (t TokenType) PP() string {
return "TRUE"
case FALSE:
return "FALSE"
case CASELESS:
return "CASELESS"
default:
panic("UNKNOWN TOKEN TYPE")
}
Expand Down Expand Up @@ -658,6 +661,8 @@ func (s *Lexer) getNextToken() (*Token, error) {
token.TokenType = FALSE
case "whole":
token.TokenType = WHOLE
case "caseless":
token.TokenType = CASELESS
}
case SWHITESPACE:
token.TokenType = WS
Expand Down
1 change: 1 addition & 0 deletions libvore/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,4 +199,5 @@ func TestTokenTypePP(t *testing.T) {
ppMatch(t, TRUE, "TRUE")
ppMatch(t, FALSE, "FALSE")
ppMatch(t, WHOLE, "WHOLE")
ppMatch(t, CASELESS, "CASELESS")
}
25 changes: 23 additions & 2 deletions libvore/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ func parse_expression(tokens []*Token, token_index int) (AstExpression, int, err
current_token.TokenType == UPPER || current_token.TokenType == LOWER ||
current_token.TokenType == LETTER || current_token.TokenType == LINE ||
current_token.TokenType == FILE || current_token.TokenType == WORD ||
current_token.TokenType == WHOLE {
current_token.TokenType == WHOLE || current_token.TokenType == CASELESS {
return parse_primary_or_dec(tokens, token_index)
}
return nil, token_index, NewParseError(*current_token, "Unexpected token. Expected 'at', 'between', 'exactly', 'maybe', 'in', '<string>', '<identifier>', or a character class ")
Expand Down Expand Up @@ -617,6 +617,8 @@ func parse_listable(tokens []*Token, token_index int) (AstListable, int, error)
}
return &r, new_index, nil

} else if current_token.TokenType == CASELESS {
return parse_caseless(tokens, token_index)
} else if isListableClass(current_token.TokenType) {
return parse_character_class(tokens, token_index, false)
}
Expand All @@ -627,6 +629,8 @@ func parse_literal(tokens []*Token, token_index int) (AstLiteral, int, error) {
current_token := tokens[token_index]
if current_token.TokenType == STRING {
return parse_string(tokens, token_index, false)
} else if current_token.TokenType == CASELESS {
return parse_caseless(tokens, token_index)
} else if current_token.TokenType == IDENTIFIER {
return parse_variable(tokens, token_index)
} else if current_token.TokenType == OPENPAREN {
Expand Down Expand Up @@ -719,10 +723,27 @@ func parse_atom(tokens []*Token, token_index int) (AstAtom, int, error) {
current_token := tokens[token_index]
if current_token.TokenType == STRING {
return parse_string(tokens, token_index, false)
} else if current_token.TokenType == CASELESS {
return parse_caseless(tokens, token_index)
} else if current_token.TokenType == IDENTIFIER {
return parse_variable(tokens, token_index)
}
return nil, token_index, NewParseError(*current_token, "Unexpected token. Expected '<string>' or '<identifier>'.")
return nil, token_index, NewParseError(*current_token, "Unexpected token. Expected 'caseless', '<string>', or '<identifier>'.")
}

func parse_caseless(tokens []*Token, token_index int) (*AstString, int, error) {
next_index := consumeIgnoreableTokens(tokens, token_index+1)
if tokens[next_index].TokenType != STRING {
return nil, next_index, NewParseError(*tokens[next_index], "Unexpected token. Expected <string> after the 'caseless' keyword.")
}

s := AstString{
false,
tokens[next_index].Lexeme,
true,
}

return &s, next_index + 1, nil
}

func parse_string(tokens []*Token, token_index int, not bool) (*AstString, int, error) {
Expand Down
2 changes: 1 addition & 1 deletion libvore/vore_ast_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import "testing"
func TestBigTest(t *testing.T) {
vore, err := Compile(`
find all "yeah"
find skip 1 take 1 between 2 and 3 ("hello" or "world") in 'a', 'b', 'c' to 'f' at least 6 "!" at most 9 ":)"
find skip 1 take 1 between 2 and 3 (caseless "hello" or "world") in 'a', 'b', 'c' to 'f' at least 6 "!" at most 9 ":)"
replace all "helloworld" any {whitespace digit} = test upper = yeah yeah (lower letter (line start) file start line end) file end with "wow!!"
set a to pattern "okay"
`)
Expand Down
17 changes: 17 additions & 0 deletions libvore/vore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -364,3 +364,20 @@ x, y, z`)
}},
})
}

func TestCaseless(t *testing.T) {
vore, err := Compile("find all caseless 'test'")
checkNoError(t, err)
results := vore.Run(`
this is a test
this is a TEST
this is a Test
this is a tEsT
`)
matches(t, results, []TestMatch{
{13, "test", None[string](), []TestVar{}},
{30, "TEST", None[string](), []TestVar{}},
{47, "Test", None[string](), []TestVar{}},
{64, "tEsT", None[string](), []TestVar{}},
})
}

0 comments on commit fb38ff2

Please sign in to comment.