Skip to content

Commit

Permalink
Handle undefined string identifiers. (VirusTotal#54)
Browse files Browse the repository at this point in the history
When parsing rules gyp does not complain about undefined string identifiers.

For example, it does not error when parsing this rule:

rule a { condition: $x }

With these changes it now walks the condition AST and makes sure all string
references are valid, including wildcard expansion.

While I'm here I uncovered a slight error with the Children() method on the Of
node, where it would always set the Strings to nil if there were none, which is
now fixed.

I also discovered some existing tests that were broken by this (they did not
have a $foo4 string defined) so I "fixed" those too.
  • Loading branch information
wxsBSD authored Apr 29, 2022
1 parent d97259d commit 6b198f3
Show file tree
Hide file tree
Showing 6 changed files with 669 additions and 169 deletions.
9 changes: 6 additions & 3 deletions ast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -690,10 +690,15 @@ func (f *ForOf) Children() []Node {

// Children returns the node's child nodes.
func (o *Of) Children() []Node {
nodes := []Node{o.Quantifier, o.Strings}
// Because this node can have children that are exclusively rules or
// strings we need to only add them if they are non-nil.
nodes := []Node{o.Quantifier}
if o.Rules != nil {
nodes = append(nodes, o.Rules)
}
if o.Strings != nil {
nodes = append(nodes, o.Strings)
}
return nodes
}

Expand Down Expand Up @@ -1233,5 +1238,3 @@ func (o *Operation) AsProto() *pb.Expression {
}
return expr
}


1 change: 1 addition & 0 deletions error/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ const (
UnevenNumberOfDigitsError
InvalidAsciiError
InvalidUTF8Error
UndefinedStringIdentifierError
)

type Error struct {
Expand Down
5 changes: 5 additions & 0 deletions parser/adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ func Parse(input io.Reader) (rs *ast.RuleSet, err error) {
Imports: make([]string, 0),
Rules: make([]*ast.Rule, 0),
},
// Used to collect the strings as they are parsed on a per-rule basis.
// When the condition is parsed this is used as a lookup table to check
// for undefined strings.
strings: make(map[string]bool),
}
lexer.scanner.In = input
lexer.scanner.Out = ioutil.Discard
Expand All @@ -51,6 +55,7 @@ type lexer struct {
scanner Scanner
err gyperror.Error
ruleSet *ast.RuleSet
strings map[string]bool
}

// Lex provides the interface expected by the goyacc parser. This function is
Expand Down
155 changes: 142 additions & 13 deletions parser/grammar.y
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,9 @@ rule
{
$<rule>4.Condition = $10
$$ = $<rule>4
// Clear the strings map for the next rule being parsed.
asLexer(yrlex).strings = make(map[string]bool)
}
;
Expand Down Expand Up @@ -470,10 +473,14 @@ meta_declaration
string_declarations
: string_declaration
{
lexer := asLexer(yrlex)
lexer.strings[$1.GetIdentifier()] = true
$$ = []ast.String{$1}
}
| string_declarations string_declaration
{
lexer := asLexer(yrlex)
lexer.strings[$2.GetIdentifier()] = true
$$ = append($1, $2)
}
;
Expand Down Expand Up @@ -829,21 +836,51 @@ expression
}
| _STRING_IDENTIFIER_
{
identifier := strings.TrimPrefix($1, "$")
// Exclude anonymous ($) strings.
if identifier != "" {
lexer := asLexer(yrlex)
if _, ok := lexer.strings[identifier]; !ok {
return lexer.setError(
gyperror.UndefinedStringIdentifierError,
`undefined string identifier: %s`, $1)
}
}
$$ = &ast.StringIdentifier{
Identifier: strings.TrimPrefix($1, "$"),
Identifier: identifier,
}
}
| _STRING_IDENTIFIER_ _AT_ primary_expression
{
identifier := strings.TrimPrefix($1, "$")
// Exclude anonymous ($) strings.
if identifier != "" {
lexer := asLexer(yrlex)
if _, ok := lexer.strings[identifier]; !ok {
return lexer.setError(
gyperror.UndefinedStringIdentifierError,
`undefined string identifier: %s`, $1)
}
}
$$ = &ast.StringIdentifier{
Identifier: strings.TrimPrefix($1, "$"),
Identifier: identifier,
At: $3,
}
}
| _STRING_IDENTIFIER_ _IN_ range
{
identifier := strings.TrimPrefix($1, "$")
// Exclude anonymous ($) strings.
if identifier != "" {
lexer := asLexer(yrlex)
if _, ok := lexer.strings[identifier]; !ok {
return lexer.setError(
gyperror.UndefinedStringIdentifierError,
`undefined string identifier: %s`, $1)
}
}
$$ = &ast.StringIdentifier{
Identifier: strings.TrimPrefix($1, "$"),
Identifier: identifier,
In: $3,
}
}
Expand Down Expand Up @@ -1011,6 +1048,12 @@ string_set
}
| _THEM_
{
lexer := asLexer(yrlex)
if len(lexer.strings) == 0 {
return lexer.setError(
gyperror.UndefinedStringIdentifierError,
`undefined string identifier: %s`, ast.KeywordThem)
}
$$ = ast.KeywordThem
}
;
Expand All @@ -1031,16 +1074,48 @@ string_enumeration
string_enumeration_item
: _STRING_IDENTIFIER_
{
identifier := strings.TrimPrefix($1, "$")
lexer := asLexer(yrlex)
// Anonymous strings ($) in string enumerations are an error.
if _, ok := lexer.strings[identifier]; !ok || identifier == "" {
return lexer.setError(
gyperror.UndefinedStringIdentifierError,
`undefined string identifier: %s`, $1)
}
$$ = &ast.StringIdentifier{
Identifier: strings.TrimPrefix($1, "$"),
Identifier: identifier,
}
}
| _STRING_IDENTIFIER_WITH_WILDCARD_
{
$$ = &ast.StringIdentifier{
Identifier: strings.TrimPrefix($1, "$"),
{
identifier := strings.TrimSuffix($1, "*")
lexer := asLexer(yrlex)
// There must be at least one defined string.
if len(identifier) == 0 && len(lexer.strings) == 0 {
return lexer.setError(
gyperror.UndefinedStringIdentifierError,
`undefined string identifier: %s`, $1)
}
// There must be at least one string that will match the wildcard.
identifier = strings.TrimPrefix(identifier, "$")
match := false
for s, _ := range lexer.strings {
if strings.HasPrefix(s, identifier) {
match = true
break
}
}
if !match {
return lexer.setError(
gyperror.UndefinedStringIdentifierError,
`undefined string identifier: %s`, $1)
}
// Can't use "identifier" here as that has the asterisk stripped already.
$$ = &ast.StringIdentifier{
Identifier: strings.TrimPrefix($1, "$"),
}
}
;


Expand Down Expand Up @@ -1158,39 +1233,93 @@ primary_expression
}
| _STRING_COUNT_ _IN_ range
{
identifier := strings.TrimPrefix($1, "#")
if identifier != "" {
lexer := asLexer(yrlex)
if _, ok := lexer.strings[identifier]; !ok {
return lexer.setError(
gyperror.UndefinedStringIdentifierError,
`undefined string identifier: %s`, $1)
}
}
$$ = &ast.StringCount{
Identifier: strings.TrimPrefix($1, "#"),
Identifier: identifier,
In: $3,
}
}
| _STRING_COUNT_
{
identifier := strings.TrimPrefix($1, "#")
if identifier != "" {
lexer := asLexer(yrlex)
if _, ok := lexer.strings[identifier]; !ok {
return lexer.setError(
gyperror.UndefinedStringIdentifierError,
`undefined string identifier: %s`, $1)
}
}
$$ = &ast.StringCount{
Identifier: strings.TrimPrefix($1, "#"),
Identifier: identifier,
}
}
| _STRING_OFFSET_ '[' primary_expression ']'
{
identifier := strings.TrimPrefix($1, "@")
if identifier != "" {
lexer := asLexer(yrlex)
if _, ok := lexer.strings[identifier]; !ok {
return lexer.setError(
gyperror.UndefinedStringIdentifierError,
`undefined string identifier: %s`, $1)
}
}
$$ = &ast.StringOffset{
Identifier: strings.TrimPrefix($1, "@"),
Identifier: identifier,
Index: $3,
}
}
| _STRING_OFFSET_
{
identifier := strings.TrimPrefix($1, "@")
if identifier != "" {
lexer := asLexer(yrlex)
if _, ok := lexer.strings[identifier]; !ok {
return lexer.setError(
gyperror.UndefinedStringIdentifierError,
`undefined string identifier: %s`, $1)
}
}
$$ = &ast.StringOffset{
Identifier: strings.TrimPrefix($1, "@"),
Identifier: identifier,
}
}
| _STRING_LENGTH_ '[' primary_expression ']'
{
identifier := strings.TrimPrefix($1, "!")
if identifier != "" {
lexer := asLexer(yrlex)
if _, ok := lexer.strings[identifier]; !ok {
return lexer.setError(
gyperror.UndefinedStringIdentifierError,
`undefined string identifier: %s`, $1)
}
}
$$ = &ast.StringLength{
Identifier: strings.TrimPrefix($1, "!"),
Identifier: identifier,
Index: $3,
}
}
| _STRING_LENGTH_
{
identifier := strings.TrimPrefix($1, "!")
if identifier != "" {
lexer := asLexer(yrlex)
if _, ok := lexer.strings[identifier]; !ok {
return lexer.setError(
gyperror.UndefinedStringIdentifierError,
`undefined string identifier: %s`, $1)
}
}
$$ = &ast.StringLength{
Identifier: strings.TrimPrefix($1, "!"),
}
Expand Down Expand Up @@ -1272,4 +1401,4 @@ func operation(operator ast.OperatorType, left, right ast.Expression) (n ast.Exp
}
}
return n
}
}
Loading

0 comments on commit 6b198f3

Please sign in to comment.