From 22e83c3be500eef8770bf37282a9ced9e1dec83c Mon Sep 17 00:00:00 2001 From: Gustavo Garcia Pascual Date: Thu, 17 Jan 2019 05:36:54 -0500 Subject: [PATCH] Initial commit --- .gitignore | 7 + Dockerfile | 18 + LICENSES_THIRD_PARTIES | 23 + Makefile | 21 + README.md | 84 ++ cmd/j2y/errors.go | 24 + cmd/j2y/main.go | 56 + cmd/j2y/opts.go | 42 + cmd/y2j/errors.go | 24 + cmd/y2j/main.go | 53 + cmd/y2j/opts.go | 42 + data/data.go | 68 + data/data.proto | 57 + data/serialize.go | 233 ++++ data/string.go | 93 ++ grammar/adapter.go | 62 + grammar/errors.go | 10 + grammar/grammar.y | 748 +++++++++++ grammar/hex_grammar.y | 410 ++++++ grammar/hex_lexer.l | 125 ++ grammar/lexer.go | 2507 ++++++++++++++++++++++++++++++++++++ grammar/lexer.l | 406 ++++++ grammar/parser.go | 1488 +++++++++++++++++++++ grammar/re_grammar.y | 434 +++++++ grammar/re_lexer.l | 270 ++++ tests/duplicate_test.go | 105 ++ tests/for_test.go | 19 + tests/grammar_test.go | 159 +++ tests/modules_test.go | 20 + tests/ruleset.yar | 102 ++ tests/test.go | 31 + tests/unterminated_test.go | 44 + 32 files changed, 7785 insertions(+) create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 LICENSES_THIRD_PARTIES create mode 100644 Makefile create mode 100644 README.md create mode 100644 cmd/j2y/errors.go create mode 100644 cmd/j2y/main.go create mode 100644 cmd/j2y/opts.go create mode 100644 cmd/y2j/errors.go create mode 100644 cmd/y2j/main.go create mode 100644 cmd/y2j/opts.go create mode 100644 data/data.go create mode 100644 data/data.proto create mode 100644 data/serialize.go create mode 100644 data/string.go create mode 100644 grammar/adapter.go create mode 100644 grammar/errors.go create mode 100644 grammar/grammar.y create mode 100644 grammar/hex_grammar.y create mode 100644 grammar/hex_lexer.l create mode 100644 grammar/lexer.go create mode 100644 grammar/lexer.l create mode 100644 grammar/parser.go create mode 100644 grammar/re_grammar.y create mode 100644 grammar/re_lexer.l create mode 100644 tests/duplicate_test.go create mode 100644 tests/for_test.go create mode 100644 tests/grammar_test.go create mode 100644 tests/modules_test.go create mode 100644 tests/ruleset.yar create mode 100644 tests/test.go create mode 100644 tests/unterminated_test.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..634f103 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +y2j +!y2j/ +y2j-linux +y2j-mac +y2j.exe +y.output +yara-parser \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..eb5d4ca --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM golang:1.10 + +RUN DEBIAN_FRONTEND=noninteractive \ + apt update && apt install -y \ + automake \ + bison \ + help2man \ + m4 \ + texinfo \ + texlive + +RUN go get golang.org/x/tools/cmd/goyacc +RUN go get github.com/pebbe/flexgo/... + +ENV FLEXGO=/go/src/github.com/pebbe/flexgo + +RUN cd ${FLEXGO} && ./configure && cd - +RUN make -C ${FLEXGO} && make -C ${FLEXGO} install \ No newline at end of file diff --git a/LICENSES_THIRD_PARTIES b/LICENSES_THIRD_PARTIES new file mode 100644 index 0000000..d06f33c --- /dev/null +++ b/LICENSES_THIRD_PARTIES @@ -0,0 +1,23 @@ +- yara-parser (https://github.com/Northern-Lights/yara-parser) + +MIT License + +Copyright (c) 2018 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..78cba58 --- /dev/null +++ b/Makefile @@ -0,0 +1,21 @@ +all: grammar y2j + +builder: + docker build -t go-yara-parser-builder . + +grammar: + docker run --rm -v ${PWD}/grammar:/grammar go-yara-parser-builder bash -c 'flexgo -G -v -o /grammar/lexer.go /grammar/lexer.l && goyacc -p xx -o /grammar/parser.go /grammar/grammar.y' + +j2y: + go build github.com/VirusTotal/go-yara-parser/cmd/j2y + +y2j: + go build github.com/VirusTotal/go-yara-parser/cmd/y2j + +release: parser lexer + GOOS=linux go build -o y2j-linux github.com/VirusTotal/go-yara-parser/cmd/y2j + GOOS=darwin go build -o y2j-mac github.com/VirusTotal/go-yara-parser/cmd/y2j + GOOS=windows go build -o y2j.exe github.com/VirusTotal/go-yara-parser/cmd/y2j + +clean: + rm grammar/lexer.go grammar/parser.go y.output y2j diff --git a/README.md b/README.md new file mode 100644 index 0000000..b7adc4a --- /dev/null +++ b/README.md @@ -0,0 +1,84 @@ +# go-yara-parser + +`go-yara-parser` is a Go library for manipulating YARA rulesets. Its key feature is that it uses the same grammar and lexer files as the original libyara to ensure that lexing and parsing work exactly like YARA. The grammar and lexer files have been modified to fill Go data structures for ruleset manipulation instead of compiling rulesets for data matching. + +Using `go-yara-parser`, one will be able to read YARA rulesets to programatically change metadata, rule names, rule modifiers, tags, strings, and more. + +The ability to serialize rulesets to JSON for rule manipulation in other languages is provided with the `y2j` tool. Similarly, `j2y` provides JSON-to-YARA conversion, but do see __Limitations__ below. + +## `y2j` Usage + +Command line usage for `y2j` looks like the following: + +``` +$ y2j --help +Usage of y2j: y2j [options] file.yar + +options: + -indent int + Set number of indent spaces (default 2) + -o string + JSON output file +``` + +Note that the string types are as follows: + +| String type `int` code | Designation | +| - | - | +| 0 | string | +| 1 | hex pair bytes | +| 2 | regex | + +## Go Usage + +Sample usage for working with rulesets in Go looks like the following: + +```go +package main + +import ( + "fmt" + "log" + "os" + + "github.com/VirusTotal/go-yara-parser/grammar" +) + +func main() { + input, err := os.Open(os.Args[1]) // Single argument: path to your file + if err != nil { + log.Fatalf("Error: %s\n", err) + } + + ruleset, err := grammar.Parse(input, os.Stdout) + if err != nil { + log.Fatalf(`Parsing failed: "%s"`, err) + } + + fmt.Printf("Ruleset:\n%v\n", ruleset) + + // Manipulate the first rule + rule := ruleset.Rules[0] + rule.Identifier = "new_rule_name" + rule.Modifiers.Global = true + rule.Modifiers.Private = false +} +``` + +## Development + +The included Dockerfile will build an image suitable for producing the parser and lexer using goyacc and flexgo. There is a `builder` target in the `Makefile` to help you quickly get started with this. Run the following to build the builder image: + +`make builder` + +This will provide you with a Docker image called `go-yara-parser-builder`. + +As you make changes to the grammar, you can then run `make grammar`. The .go files will be output in the `grammar/` directory. + +## Limitations + +Currently, there are no guarantees with the library that modified rules will serialize back into a valid YARA ruleset. For example, you can set `rule.Identifier = "123"`, but this would be invalid YARA. Additionally, adding or removing strings may cause a condition to become invalid, and conditions are currently treated only as text. Comments also cannot be retained. + +## License and third party code + +This project uses code from [`yara-parser`](https://github.com/Northern-Lights/yara-parser) by [Northern-Lights](https://github.com/Northern-Lights), which is available under the MIT license (see `LICENSES_THIRD_PARTIES`). diff --git a/cmd/j2y/errors.go b/cmd/j2y/errors.go new file mode 100644 index 0000000..be4c0a4 --- /dev/null +++ b/cmd/j2y/errors.go @@ -0,0 +1,24 @@ +package main + +import ( + "fmt" + "os" + "strings" +) + +// perror writes a format string and args to stderr +func perror(s string, a ...interface{}) { + var sb strings.Builder + sb.WriteString(fmt.Sprintf(s, a...)) + sb.WriteRune('\n') + os.Stderr.WriteString(sb.String()) +} + +// handleErr should be deferred to report any errors in deferred functions +func handleErr(f func() error) { + err := f() + if err != nil { + perror(`Error: %s`, err) + os.Exit(127) + } +} diff --git a/cmd/j2y/main.go b/cmd/j2y/main.go new file mode 100644 index 0000000..83eb64d --- /dev/null +++ b/cmd/j2y/main.go @@ -0,0 +1,56 @@ +package main + +import ( + "encoding/json" + "io" + "os" + + "github.com/VirusTotal/go-yara-parser/data" +) + +// global options +var opts options + +func main() { + opts = getopt() + + jsonFile, err := os.Open(opts.Infile) + if err != nil { + perror(`Couldn't open JSON file "%s": %s`, opts.Infile, err) + os.Exit(2) + } + defer handleErr(jsonFile.Close) + + var ruleset data.RuleSet + err = json.NewDecoder(jsonFile).Decode(&ruleset) + if err != nil { + perror(`Couldn't JSON decode file: %s`, err) + os.Exit(3) + } + + // Set output to stdout if not specified; otherwise file + var out io.Writer + if opts.Outfile == "" { + out = os.Stdout + } else { + f, err := os.Create(opts.Outfile) + if err != nil { + perror(`Couldn't create output file "%s"`, opts.Outfile) + os.Exit(5) + } + defer handleErr(f.Close) + out = f + } + + txt, err := ruleset.Serialize() + if err != nil { + perror(`Couldn't serialize ruleset: %s`, err) + os.Exit(6) + } + + _, err = out.Write([]byte(txt)) + if err != nil { + perror(`Error writing YARA: %s`, err) + os.Exit(6) + } +} diff --git a/cmd/j2y/opts.go b/cmd/j2y/opts.go new file mode 100644 index 0000000..0e646db --- /dev/null +++ b/cmd/j2y/opts.go @@ -0,0 +1,42 @@ +package main + +import ( + "flag" + "os" + "strings" +) + +type options struct { + Indent string + Infile string + Outfile string +} + +func getopt() options { + var ( + o options + indent int + ) + + flag.IntVar(&indent, "indent", 2, "Set number of indent spaces") + flag.StringVar(&o.Outfile, "o", "", "YARA output file") + + flag.Parse() + + // Set indent + var sb strings.Builder + for i := 0; i < indent; i++ { + sb.WriteRune(' ') + } + o.Indent = sb.String() + + // The JSON file is the only positional argument + if n := flag.NArg(); n != 1 { + perror("Expected 1 input file; found %d", n) + os.Exit(1) + } + + o.Infile = flag.Args()[0] + + return o +} diff --git a/cmd/y2j/errors.go b/cmd/y2j/errors.go new file mode 100644 index 0000000..be4c0a4 --- /dev/null +++ b/cmd/y2j/errors.go @@ -0,0 +1,24 @@ +package main + +import ( + "fmt" + "os" + "strings" +) + +// perror writes a format string and args to stderr +func perror(s string, a ...interface{}) { + var sb strings.Builder + sb.WriteString(fmt.Sprintf(s, a...)) + sb.WriteRune('\n') + os.Stderr.WriteString(sb.String()) +} + +// handleErr should be deferred to report any errors in deferred functions +func handleErr(f func() error) { + err := f() + if err != nil { + perror(`Error: %s`, err) + os.Exit(127) + } +} diff --git a/cmd/y2j/main.go b/cmd/y2j/main.go new file mode 100644 index 0000000..f4b44ec --- /dev/null +++ b/cmd/y2j/main.go @@ -0,0 +1,53 @@ +package main + +import ( + "encoding/json" + "io" + "os" + + "github.com/VirusTotal/go-yara-parser/grammar" +) + +// global options +var opts options + +func main() { + opts = getopt() + + yaraFile, err := os.Open(opts.Infile) + if err != nil { + perror(`Couldn't open YARA file "%s": %s`, opts.Infile, err) + os.Exit(2) + } + defer handleErr(yaraFile.Close) + + ruleset, err := grammar.Parse(yaraFile, os.Stdout) + if err != nil { + perror(`Couldn't parse YARA ruleset: %s`, err) + os.Exit(3) + } + ruleset.File = opts.Infile + + // Set output to stdout if not specified; otherwise file + var out io.Writer + if opts.Outfile == "" { + out = os.Stdout + } else { + f, err := os.Create(opts.Outfile) + if err != nil { + perror(`Couldn't create output file "%s"`, opts.Outfile) + os.Exit(5) + } + defer handleErr(f.Close) + out = f + } + + enc := json.NewEncoder(out) + enc.SetEscapeHTML(false) + enc.SetIndent("", opts.Indent) + err = enc.Encode(&ruleset) + if err != nil { + perror(`Error writing JSON: %s`, err) + os.Exit(6) + } +} diff --git a/cmd/y2j/opts.go b/cmd/y2j/opts.go new file mode 100644 index 0000000..eeec845 --- /dev/null +++ b/cmd/y2j/opts.go @@ -0,0 +1,42 @@ +package main + +import ( + "flag" + "os" + "strings" +) + +type options struct { + Indent string + Infile string + Outfile string +} + +func getopt() options { + var ( + o options + indent int + ) + + flag.IntVar(&indent, "indent", 2, "Set number of indent spaces") + flag.StringVar(&o.Outfile, "o", "", "JSON output file") + + flag.Parse() + + // Set indent + var sb strings.Builder + for i := 0; i < indent; i++ { + sb.WriteRune(' ') + } + o.Indent = sb.String() + + // The yara file is the only positional argument + if n := flag.NArg(); n != 1 { + perror("Expected 1 input file; found %d", n) + os.Exit(1) + } + + o.Infile = flag.Args()[0] + + return o +} diff --git a/data/data.go b/data/data.go new file mode 100644 index 0000000..3b2d33b --- /dev/null +++ b/data/data.go @@ -0,0 +1,68 @@ +package data + +// RuleSet represents the contents of a yara file +type RuleSet struct { + File string `json:"file"` // Name of the yara file + Imports []string `json:"imports"` + Includes []string `json:"includes"` + Rules []Rule `json:"rules"` +} + +// A Rule is a single yara rule +type Rule struct { + Modifiers RuleModifiers `json:"modifiers"` + Identifier string `json:"identifier"` + Tags []string `json:"tags"` + Meta Metas `json:"meta"` + Strings Strings `json:"strings"` + Condition string `json:"condition"` +} + +// RuleModifiers denote whether a Rule is global, private, neither, or both. +type RuleModifiers struct { + Global bool `json:"global"` + Private bool `json:"private"` +} + +// Metas are slices of Meta. A single Meta may be duplicated within Metas. +type Metas []Meta + +// A Meta is a simple key/value pair. Val should be restricted to +// int, string, and bool. +type Meta struct { + Key string `json:"key"` + Val interface{} `json:"val"` +} + +// Strings are slices of String. No two String structs may have the same +// identifier within a Strings, except for the $ anonymous identifier. +type Strings []String + +// String is a string, regex, or byte pair sequence +type String struct { + ID string `json:"id"` + Type StringType `json:"type"` + Text string `json:"text"` + Modifiers StringModifiers `json:"modifiers"` +} + +// StringType is used to differentiate between string, hex bytes, and regex +type StringType int + +// Type of String +const ( + TypeString StringType = iota + TypeHexString + TypeRegex +) + +// StringModifiers denote the status of the possible modifiers for strings +type StringModifiers struct { + Nocase bool `json:"nocase"` + ASCII bool `json:"ascii"` + Wide bool `json:"wide"` + Fullword bool `json:"fullword"` + Xor bool `json:"xor"` + I bool `json:"i"` // for regex + S bool `json:"s"` // for regex +} diff --git a/data/data.proto b/data/data.proto new file mode 100644 index 0000000..4a31526 --- /dev/null +++ b/data/data.proto @@ -0,0 +1,57 @@ +// This file is currently unused but serves as a model +// for the ruleset data structure. The issue with the +// Meta message must be solved before using this. + +syntax = "proto3"; + +message Modifiers { + bool global = 1; + bool private = 2; +} + +// value here is problematic, since in YARA, they can be string, int, or bool +message Meta { + string key = 1; + oneof value { + string s = 2; + int64 i = 3; + bool b = 4; + } +} + +message StringModifiers { + bool nocase = 1; + bool ascii = 2; + bool wide = 3; + bool fullword = 4; + bool i = 5; + bool s = 6; +} + +message String { + string id = 1; + enum Type { + TypeString = 0; + TypeHexString = 1; + TypeRegex = 2; + } + Type type = 2; + string text = 3; + StringModifiers modifiers = 4; +} + +message Rule { + Modifiers modifiers = 1; + string identifier = 2; + repeated string tags = 3; + repeated Meta meta = 4; + repeated String strings = 5; + string condition = 6; +} + +message Ruleset { + string file = 1; + repeated string imports = 2; + repeated string includes = 3; + repeated Rule rules = 4; +} \ No newline at end of file diff --git a/data/serialize.go b/data/serialize.go new file mode 100644 index 0000000..c097929 --- /dev/null +++ b/data/serialize.go @@ -0,0 +1,233 @@ +// Functions and methods for reserializing the JSON into YARA rules. +// TODO: Handle indents better... Maybe have a global var denoting +// how many spaces to indent. +// TODO: Handle indents and formatting in general for conditions. +// Once conditions are treated as first-class vs. text, we can do that. + +package data + +import ( + "fmt" + "strings" +) + +// Serialize for RuleSet builds a complete YARA ruleset +func (rs *RuleSet) Serialize() (out string, err error) { + var b strings.Builder + + if len(rs.Includes) > 0 { + for _, include := range rs.Includes { + b.WriteString(fmt.Sprintf("include \"%s\"\n", include)) + } + b.WriteRune('\n') + } + if len(rs.Imports) > 0 { + for _, imp := range rs.Imports { + b.WriteString(fmt.Sprintf("import \"%s\"\n", imp)) + } + b.WriteRune('\n') + } + + for _, rule := range rs.Rules { + str, err := rule.Serialize() + if err != nil { + return "", err + } + b.WriteString(str) + } + + out = b.String() + + return +} + +// Serialize for Rule builds a YARA rule as a string +func (r *Rule) Serialize() (out string, err error) { + var b strings.Builder + + // Rule modifiers + if r.Modifiers.Global { + b.WriteString("global ") + } + if r.Modifiers.Private { + b.WriteString("private ") + } + + // Rule name + b.WriteString(fmt.Sprintf("rule %s ", r.Identifier)) + + // Any applicable tags + if len(r.Tags) > 0 { + b.WriteString(": ") + for _, t := range r.Tags { + b.WriteString(t) + b.WriteRune(' ') + } + } + + // Start metas, strings, etc. + b.WriteString("{\n") + + metas, err := r.Meta.Serialize() + if err != nil { + return + } + b.WriteString(metas) + + strs, err := r.Strings.Serialize() + if err != nil { + return + } + b.WriteString(strs) + + b.WriteString("condition:\n") + b.WriteString(" ") // TODO: Don't assume indent... + b.WriteString(r.Condition) + b.WriteString("\n}\n\n") + + out = b.String() + + return +} + +// Serialize for Metas returns the "meta:" section in the YARA rule +func (ms *Metas) Serialize() (out string, err error) { + if ms == nil || len(*ms) == 0 { + return + } + + var b strings.Builder + b.WriteString("meta:\n") + + for _, m := range *ms { + meta, e := m.Serialize() + if e != nil { + err = e + return + } + b.WriteString(" ") // TODO: make indent customizable + b.WriteString(meta) + b.WriteRune('\n') + } + + out = b.String() + return +} + +// Serialize for Meta returns the string representation of the key/value pair +func (m *Meta) Serialize() (out string, err error) { + switch val := m.Val.(type) { + case string: + out = fmt.Sprintf(`%s = "%s"`, m.Key, val) + + case int64, bool: + out = fmt.Sprintf(`%s = %v`, m.Key, val) + + case float64: + // This is a bit tricky... val is interface{} and JSON unmarshals it + // as float64... So ensure decimal part is zero and treat as int64. + n := int64(val) + check := val - float64(n) // This should be 0.0 if it was int64 + if check != 0.0 { + err = fmt.Errorf(`Unsupported meta value type "%T"`, val) + return + } + out = fmt.Sprintf(`%s = %v`, m.Key, val) + + default: + err = fmt.Errorf(`Unsupported meta value type "%s"`, val) + } + + return +} + +// Serialize for Strings returns the "strings:" section in the YARA rule +func (ss *Strings) Serialize() (out string, err error) { + if ss == nil || len(*ss) == 0 { + return + } + + var b strings.Builder + b.WriteString("strings:\n") + + for _, s := range *ss { + str, e := s.Serialize() + if e != nil { + err = e + return + } + b.WriteString(" ") // TODO: Make indent customizable + b.WriteString(str) + b.WriteRune('\n') + } + + out = b.String() + return +} + +// Serialize for String returns a String as a string +func (s *String) Serialize() (out string, err error) { + // Format string for: + // ` = ` + format := "%s = %s%s%s %s" + + var ( + encapsOpen string + encapsClose string + ) + switch t := s.Type; t { + case TypeString: + encapsOpen, encapsClose = `"`, `"` + + case TypeHexString: + encapsOpen, encapsClose = "{", "}" + + case TypeRegex: + encapsOpen = "/" + var closeBuilder strings.Builder + closeBuilder.WriteRune('/') + if s.Modifiers.I { + closeBuilder.WriteRune('i') + } + if s.Modifiers.S { + closeBuilder.WriteRune('s') + } + encapsClose = closeBuilder.String() + + default: + err = fmt.Errorf("No such string type %s (%d)", t, t) + return + } + + mods, _ := s.Modifiers.Serialize() + + out = fmt.Sprintf(format, s.ID, encapsOpen, s.Text, encapsClose, mods) + + return +} + +// Serialize for StringModifiers creates a space-sparated list of +// string modifiers, excluding the i and s which are appended to /regex/ +// The returned error must be nil. +func (m *StringModifiers) Serialize() (out string, _ error) { + const modsAvailable = 4 + modifiers := make([]string, 0, modsAvailable) + if m.ASCII { + modifiers = append(modifiers, "ascii") + } + if m.Wide { + modifiers = append(modifiers, "wide") + } + if m.Nocase { + modifiers = append(modifiers, "nocase") + } + if m.Fullword { + modifiers = append(modifiers, "fullword") + } + if m.Xor { + modifiers = append(modifiers, "xor") + } + + out = strings.Join(modifiers, " ") + return +} diff --git a/data/string.go b/data/string.go new file mode 100644 index 0000000..0342d3a --- /dev/null +++ b/data/string.go @@ -0,0 +1,93 @@ +// Stringer implementations for each type + +package data + +import ( + "fmt" +) + +// String for RuleSet returns the name of the file +func (rs RuleSet) String() string { + return rs.File +} + +// String for Rule returns the rule ID +func (r Rule) String() string { + return r.Identifier +} + +// String for Metas returns a string representation of the keys/values +func (ms Metas) String() string { + mets := make([]string, len(ms)) + for i, m := range ms { + mets[i] = m.String() + } + return fmt.Sprintf("%v", mets) +} + +// String for Meta returns a string representation of the key/value +func (m Meta) String() string { + switch v := m.Val.(type) { + case string: + return fmt.Sprintf(`%s/"%v"`, m.Key, v) + case int64, bool: + return fmt.Sprintf(`%s/%v`, m.Key, v) + } + return fmt.Sprintf("%s/", m.Key) +} + +// String for Strings returns a string representation of the String IDs +func (ss Strings) String() string { + strs := make([]string, len(ss)) + for i, s := range ss { + strs[i] = s.ID + } + return fmt.Sprintf("%v", strs) +} + +// String returns the identifier of the String +func (s String) String() string { + return s.ID +} + +func (t StringType) String() string { + switch t { + case TypeString: + return "string" + + case TypeHexString: + return "hex" + + case TypeRegex: + return "regex" + } + + return "unknown" +} + +// String for StringModifiers returns a string representation of the modifiers +func (m StringModifiers) String() string { + mods := make([]string, 0, 6) + if m.Nocase { + mods = append(mods, "nocase") + } + if m.ASCII { + mods = append(mods, "ascii") + } + if m.Wide { + mods = append(mods, "wide") + } + if m.Fullword { + mods = append(mods, "fullword") + } + if m.Xor { + mods = append(mods, "xor") + } + if m.I { + mods = append(mods, "insensitive") + } + if m.S { + mods = append(mods, "dotall") + } + return fmt.Sprintf("%v", mods) +} diff --git a/grammar/adapter.go b/grammar/adapter.go new file mode 100644 index 0000000..1844d80 --- /dev/null +++ b/grammar/adapter.go @@ -0,0 +1,62 @@ +// adapter.go provides an adapter for a flexgo lexer to work +// with a goyacc parser + +package grammar + +import ( + "fmt" + "io" + + "github.com/VirusTotal/go-yara-parser/data" +) + +var errParser error + +func init() { + xxErrorVerbose = true +} + +// Parse takes an input source and an output and initiates parsing +func Parse(input io.Reader, output io.Writer) (rs data.RuleSet, err error) { + defer recoverParse(&err) + + // "Reset" the global ParsedRuleset + ParsedRuleset = data.RuleSet{} + + lexer := Lexer{ + lexer: *NewScanner(), + } + lexer.lexer.In = input + lexer.lexer.Out = output + + result := xxParse(&lexer) + if result != 0 { + + err = fmt.Errorf(`Parser result: "%d" %s`, result, errParser) + } + + rs = ParsedRuleset + + return +} + +// Lexer is an adapter that fits the flexgo lexer ("Scanner") into goyacc +type Lexer struct { + lexer Scanner +} + +// Lex provides the interface expected by the goyacc parser. +// It sets the global yylval pointer (defined in the lexer file) +// to the one passed as an argument so that the parser actions +// can make use of it. +func (l *Lexer) Lex(lval *xxSymType) int { + yylval = lval + return l.lexer.Lex().(int) +} + +// Error satisfies the interface expected of the goyacc parser. +// Here, it simply writes the error to stdout. +func (l *Lexer) Error(e string) { + errParser = fmt.Errorf(`grammar: lexical error @%d: "%s"`, + l.lexer.Lineno, e) +} diff --git a/grammar/errors.go b/grammar/errors.go new file mode 100644 index 0000000..d4dc938 --- /dev/null +++ b/grammar/errors.go @@ -0,0 +1,10 @@ +package grammar + +import "fmt" + +func recoverParse(err *error) { + if r := recover(); r != nil { + e := fmt.Errorf("%s", r) + *err = e + } +} diff --git a/grammar/grammar.y b/grammar/grammar.y new file mode 100644 index 0000000..b33dfbc --- /dev/null +++ b/grammar/grammar.y @@ -0,0 +1,748 @@ +/* +Copyright (c) 2007-2013. The YARA Authors. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +%{ +package grammar + +import ( + "fmt" + "strings" + + "github.com/VirusTotal/go-yara-parser/data" +) + +var ParsedRuleset data.RuleSet + +type regexPair struct { + text string + mods data.StringModifiers +} + +%} + +// yara-parser: we have 'const eof = 0' in lexer.l +// Token that marks the end of the original file. +// %token _END_OF_FILE_ 0 + +// TODO: yara-parser: https://github.com/VirusTotal/yara/blob/v3.8.1/libyara/lexer.l#L285 +// Token that marks the end of included files, we can't use _END_OF_FILE_ +// because bison stops parsing when it sees _END_OF_FILE_, we want to be +// be able to identify the point where an included file ends, but continuing +// parsing any content that follows. +%token _END_OF_INCLUDED_FILE_ + +%token _DOT_DOT_ +%token _RULE_ +%token _PRIVATE_ +%token _GLOBAL_ +%token _META_ +%token _STRINGS_ +%token _CONDITION_ +%token _IDENTIFIER_ +%token _STRING_IDENTIFIER_ +%token _STRING_COUNT_ +%token _STRING_OFFSET_ +%token _STRING_LENGTH_ +%token _STRING_IDENTIFIER_WITH_WILDCARD_ +%token _NUMBER_ +%token _DOUBLE_ +%token _INTEGER_FUNCTION_ +%token _TEXT_STRING_ +%token _HEX_STRING_ +%token _REGEXP_ +%token _ASCII_ +%token _WIDE_ +%token _XOR_ +%token _NOCASE_ +%token _FULLWORD_ +%token _AT_ +%token _FILESIZE_ +%token _ENTRYPOINT_ +%token _ALL_ +%token _ANY_ +%token _IN_ +%token _OF_ +%token _FOR_ +%token _THEM_ +%token _MATCHES_ +%token _CONTAINS_ +%token _IMPORT_ + +%token _TRUE_ +%token _FALSE_ + +%token _LBRACE_ _RBRACE_ +%token _INCLUDE_ + +%left _OR_ +%left _AND_ +%left '|' +%left '^' +%left '&' +%left _EQ_ _NEQ_ +%left _LT_ _LE_ _GT_ _GE_ +%left _SHIFT_LEFT_ _SHIFT_RIGHT_ +%left '+' '-' +%left '*' '\\' '%' +%right _NOT_ '~' UNARY_MINUS + +%type import +%type rule +%type tags +%type tag_list +%type meta +%type meta_declarations +%type meta_declaration +%type strings +%type string_declarations +%type string_declaration +%type string_modifier +%type string_modifiers +%type rule_modifier +%type rule_modifiers + +%union { + i64 int64 + s string + ss []string + + rm data.RuleModifiers + m data.Metas + mp data.Meta + mps data.Metas + mod data.StringModifiers + reg regexPair + ys data.String + yss data.Strings + yr data.Rule +} + + +%% + +rules + : /* empty */ + | rules rule { + ParsedRuleset.Rules = append(ParsedRuleset.Rules, $2) + } + | rules import { + ParsedRuleset.Imports = append(ParsedRuleset.Imports, $2) + } + | rules _INCLUDE_ _TEXT_STRING_ { + ParsedRuleset.Includes = append(ParsedRuleset.Includes, $3) + } + | rules _END_OF_INCLUDED_FILE_ { } + ; + + +import + : _IMPORT_ _TEXT_STRING_ + { + $$ = $2 + } + ; + + +rule + : rule_modifiers _RULE_ _IDENTIFIER_ + { + $$.Modifiers = $1 + $$.Identifier = $3 + + // Forbid duplicate rules + for _, r := range ParsedRuleset.Rules { + if $3 == r.Identifier { + err := fmt.Errorf(`Duplicate rule "%s"`, $3) + panic(err) + } + } + } + tags _LBRACE_ meta strings + { + // $4 is the rule created in above action + $4.Tags = $5 + + // Forbid duplicate tags + idx := make(map[string]struct{}) + for _, t := range $5 { + if _, had := idx[t]; had { + msg := fmt.Sprintf(`grammar: Rule "%s" has duplicate tag "%s"`, + $4.Identifier, + t) + panic(msg) + } + idx[t] = struct{}{} + } + + $4.Meta = $7 + + $4.Strings = $8 + + // Forbid duplicate string IDs, except `$` (anonymous) + idx = make(map[string]struct{}) + for _, s := range $8 { + if s.ID == "$" { + continue + } + if _, had := idx[s.ID]; had { + msg := fmt.Sprintf( + `grammar: Rule "%s" has duplicated string "%s"`, + $4.Identifier, + s.ID) + panic(msg) + } + idx[s.ID] = struct{}{} + } + } + condition _RBRACE_ + { + c := conditionBuilder.String() + c = strings.TrimLeft(c, ":\n\r\t ") + c = strings.TrimRight(c, "}\n\r\t ") + $4.Condition = c + $$ = $4 + } + ; + + +meta + : /* empty */ + { + + } + | _META_ ':' meta_declarations + { + $$ = make(data.Metas, 0, len($3)) + for _, mpair := range $3 { + // YARA is ok with duplicate keys; we follow suit + $$ = append($$, mpair) + } + } + ; + + +strings + : /* empty */ + { + $$ = data.Strings{} + } + | _STRINGS_ ':' string_declarations + { + $$ = $3 + } + ; + + +condition + : _CONDITION_ ':' boolean_expression + ; + + +rule_modifiers + : /* empty */ { $$ = data.RuleModifiers{} } + | rule_modifiers rule_modifier { + $$.Private = $$.Private || $2.Private + $$.Global = $$.Global || $2.Global + } + ; + + +rule_modifier + : _PRIVATE_ { $$.Private = true } + | _GLOBAL_ { $$.Global = true } + ; + + +tags + : /* empty */ + { + $$ = []string{} + } + | ':' tag_list + { + $$ = $2 + } + ; + + +tag_list + : _IDENTIFIER_ + { + $$ = []string{$1} + } + | tag_list _IDENTIFIER_ + { + $$ = append($1, $2) + } + ; + + + +meta_declarations + : meta_declaration { $$ = data.Metas{$1} } + | meta_declarations meta_declaration { $$ = append($$, $2)} + ; + + +meta_declaration + : _IDENTIFIER_ '=' _TEXT_STRING_ + { + $$ = data.Meta{$1, $3} + } + | _IDENTIFIER_ '=' _NUMBER_ + { + $$ = data.Meta{$1, $3} + } + | _IDENTIFIER_ '=' '-' _NUMBER_ + { + $$ = data.Meta{$1, -$4} + } + | _IDENTIFIER_ '=' _TRUE_ + { + $$ = data.Meta{$1, true} + } + | _IDENTIFIER_ '=' _FALSE_ + { + $$ = data.Meta{$1, false} + } + ; + + +string_declarations + : string_declaration { $$ = data.Strings{$1} } + | string_declarations string_declaration { $$ = append($1, $2) } + ; + + +string_declaration + : _STRING_IDENTIFIER_ '=' + { + $$.Type = data.TypeString + $$.ID = $1 + } + _TEXT_STRING_ string_modifiers + { + $3.Text = $4 + $3.Modifiers = $5 + + $$ = $3 + } + | _STRING_IDENTIFIER_ '=' + { + $$.Type = data.TypeRegex + $$.ID = $1 + } + _REGEXP_ string_modifiers + { + $3.Text = $4.text + + $5.I = $4.mods.I + $5.S = $4.mods.S + + $3.Modifiers = $5 + + $$ = $3 + } + | _STRING_IDENTIFIER_ '=' _HEX_STRING_ + { + $$.Type = data.TypeHexString + $$.ID = $1 + $$.Text = $3 + } + ; + + +string_modifiers + : /* empty */ { + $$ = data.StringModifiers{} + } + | string_modifiers string_modifier { + $$ = data.StringModifiers { + Wide: $1.Wide || $2.Wide, + ASCII: $1.ASCII || $2.ASCII, + Nocase: $1.Nocase || $2.Nocase, + Fullword: $1.Fullword || $2.Fullword, + Xor: $1.Xor || $2.Xor, + } + } + ; + + +string_modifier + : _WIDE_ { $$.Wide = true } + | _ASCII_ { $$.ASCII = true } + | _NOCASE_ { $$.Nocase = true } + | _FULLWORD_ { $$.Fullword = true } + | _XOR_ { $$.Xor = true } + ; + + +identifier + : _IDENTIFIER_ + { + + } + | identifier '.' _IDENTIFIER_ + { + + } + | identifier '[' primary_expression ']' + { + + } + + | identifier '(' arguments ')' + { + + } + ; + + +arguments + : /* empty */ { } + | arguments_list { } + + +arguments_list + : expression + { + + } + | arguments_list ',' expression + { + + } + ; + + +regexp + : _REGEXP_ + { + + } + ; + + +boolean_expression + : expression + { + + } + ; + +expression + : _TRUE_ + { + + } + | _FALSE_ + { + + } + | primary_expression _MATCHES_ regexp + { + + } + | primary_expression _CONTAINS_ primary_expression + { + + } + | _STRING_IDENTIFIER_ + { + + } + | _STRING_IDENTIFIER_ _AT_ primary_expression + { + + } + | _STRING_IDENTIFIER_ _IN_ range + { + + } + | _FOR_ for_expression error + { + + } + | _FOR_ for_expression _IDENTIFIER_ _IN_ + { + + } + integer_set ':' + { + + } + '(' boolean_expression ')' + { + + } + | _FOR_ for_expression _OF_ string_set ':' + { + + } + '(' boolean_expression ')' + { + + } + | for_expression _OF_ string_set + { + + } + | _NOT_ boolean_expression + { + + } + | boolean_expression _AND_ + { + + } + boolean_expression + { + + } + | boolean_expression _OR_ + { + + } + boolean_expression + { + + } + | primary_expression _LT_ primary_expression + { + + } + | primary_expression _GT_ primary_expression + { + + } + | primary_expression _LE_ primary_expression + { + + } + | primary_expression _GE_ primary_expression + { + + } + | primary_expression _EQ_ primary_expression + { + + } + | primary_expression _NEQ_ primary_expression + { + + } + | primary_expression + { + + } + |'(' expression ')' + { + + } + ; + + +integer_set + : '(' integer_enumeration ')' { } + | range { } + ; + + +range + : '(' primary_expression _DOT_DOT_ primary_expression ')' + { + + } + ; + + +integer_enumeration + : primary_expression + { + + } + | integer_enumeration ',' primary_expression + { + + } + ; + + +string_set + : '(' + { + + } + string_enumeration ')' + | _THEM_ + { + + } + ; + + +string_enumeration + : string_enumeration_item + | string_enumeration ',' string_enumeration_item + ; + + +string_enumeration_item + : _STRING_IDENTIFIER_ + { + + } + | _STRING_IDENTIFIER_WITH_WILDCARD_ + { + + } + ; + + +for_expression + : primary_expression + | _ALL_ + { + + } + | _ANY_ + { + + } + ; + + +primary_expression + : '(' primary_expression ')' + { + + } + | _FILESIZE_ + { + + } + | _ENTRYPOINT_ + { + + } + | _INTEGER_FUNCTION_ '(' primary_expression ')' + { + + } + | _NUMBER_ + { + + } + | _DOUBLE_ + { + + } + | _TEXT_STRING_ + { + + } + | _STRING_COUNT_ + { + + } + | _STRING_OFFSET_ '[' primary_expression ']' + { + + } + | _STRING_OFFSET_ + { + + } + | _STRING_LENGTH_ '[' primary_expression ']' + { + + } + | _STRING_LENGTH_ + { + + } + | identifier + { + + } + | '-' primary_expression %prec UNARY_MINUS + { + + } + | primary_expression '+' primary_expression + { + + } + | primary_expression '-' primary_expression + { + + } + | primary_expression '*' primary_expression + { + + } + | primary_expression '\\' primary_expression + { + + } + | primary_expression '%' primary_expression + { + + } + | primary_expression '^' primary_expression + { + + } + | primary_expression '&' primary_expression + { + + } + | primary_expression '|' primary_expression + { + + } + | '~' primary_expression + { + + } + | primary_expression _SHIFT_LEFT_ primary_expression + { + + } + | primary_expression _SHIFT_RIGHT_ primary_expression + { + + } + | regexp + { + + } + ; + +%% diff --git a/grammar/hex_grammar.y b/grammar/hex_grammar.y new file mode 100644 index 0000000..6fa0cf0 --- /dev/null +++ b/grammar/hex_grammar.y @@ -0,0 +1,410 @@ +/* +Copyright (c) 2013. The YARA Authors. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +%{ + +#include +#include + +#include +#include +#include +#include +#include +#include + + +#define STR_EXPAND(tok) #tok +#define STR(tok) STR_EXPAND(tok) + +#define YYERROR_VERBOSE + +#define YYMALLOC yr_malloc +#define YYFREE yr_free + +#define mark_as_not_fast_regexp() \ + ((RE_AST*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_FAST_REGEXP + +#define fail_if_too_many_ast_levels(cleanup_code) \ + if (((RE_AST*) yyget_extra(yyscanner))->levels++ > RE_MAX_AST_LEVELS) \ + { \ + { cleanup_code } \ + yyerror(yyscanner, lex_env, "string too long"); \ + YYABORT; \ + } + +#define fail_if(x, error) \ + if (x) \ + { \ + lex_env->last_error_code = error; \ + YYABORT; \ + } \ + +#define destroy_node_if(x, node) \ + if (x) \ + { \ + yr_re_node_destroy(node); \ + } \ + +%} + +%name-prefix "hex_yy" +%pure-parser + +%parse-param {void *yyscanner} +%parse-param {HEX_LEX_ENVIRONMENT *lex_env} + +%lex-param {yyscan_t yyscanner} +%lex-param {HEX_LEX_ENVIRONMENT *lex_env} + +%union { + int64_t integer; + RE_NODE *re_node; +} + +%token _BYTE_ +%token _MASKED_BYTE_ +%token _NUMBER_ + +%type tokens +%type token_sequence +%type token_or_range +%type token byte +%type alternatives +%type range + +%destructor { yr_re_node_destroy($$); $$ = NULL; } tokens +%destructor { yr_re_node_destroy($$); $$ = NULL; } token_sequence +%destructor { yr_re_node_destroy($$); $$ = NULL; } token_or_range +%destructor { yr_re_node_destroy($$); $$ = NULL; } token +%destructor { yr_re_node_destroy($$); $$ = NULL; } byte +%destructor { yr_re_node_destroy($$); $$ = NULL; } alternatives +%destructor { yr_re_node_destroy($$); $$ = NULL; } range + +%% + +hex_string + : '{' tokens '}' + { + RE_AST* re_ast = yyget_extra(yyscanner); + re_ast->root_node = $2; + } + ; + + +tokens + : token + { + $$ = $1; + } + | token token + { + fail_if_too_many_ast_levels({ + yr_re_node_destroy($1); + yr_re_node_destroy($2); + }); + + $$ = yr_re_node_create(RE_NODE_CONCAT, $1, $2); + + destroy_node_if($$ == NULL, $1); + destroy_node_if($$ == NULL, $2); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + | token token_sequence token + { + RE_NODE* new_concat; + RE_NODE* leftmost_concat = NULL; + RE_NODE* leftmost_node = $2; + + fail_if_too_many_ast_levels({ + yr_re_node_destroy($1); + yr_re_node_destroy($2); + yr_re_node_destroy($3); + }); + + $$ = NULL; + + /* + Some portions of the code (i.e: yr_re_split_at_chaining_point) + expect a left-unbalanced tree where the right child of a concat node + can't be another concat node. A concat node must be always the left + child of its parent if the parent is also a concat. For this reason + the can't simply create two new concat nodes arranged like this: + + concat + / \ + / \ + token's \ + subtree concat + / \ + / \ + / \ + token_sequence's token's + subtree subtree + + Instead we must insert the subtree for the first token as the + leftmost node of the token_sequence subtree. + */ + + while (leftmost_node->type == RE_NODE_CONCAT) + { + leftmost_concat = leftmost_node; + leftmost_node = leftmost_node->left; + } + + new_concat = yr_re_node_create( + RE_NODE_CONCAT, $1, leftmost_node); + + if (new_concat != NULL) + { + if (leftmost_concat != NULL) + { + leftmost_concat->left = new_concat; + $$ = yr_re_node_create(RE_NODE_CONCAT, $2, $3); + } + else + { + $$ = yr_re_node_create(RE_NODE_CONCAT, new_concat, $3); + } + } + + destroy_node_if($$ == NULL, $1); + destroy_node_if($$ == NULL, $2); + destroy_node_if($$ == NULL, $3); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + ; + + +token_sequence + : token_or_range + { + $$ = $1; + } + | token_sequence token_or_range + { + fail_if_too_many_ast_levels({ + yr_re_node_destroy($1); + yr_re_node_destroy($2); + }); + + $$ = yr_re_node_create(RE_NODE_CONCAT, $1, $2); + + destroy_node_if($$ == NULL, $1); + destroy_node_if($$ == NULL, $2); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + ; + + +token_or_range + : token + { + $$ = $1; + } + | range + { + $$ = $1; + $$->greedy = FALSE; + } + ; + + +token + : byte + { + $$ = $1; + } + | '(' + { + lex_env->inside_or++; + } + alternatives ')' + { + $$ = $3; + lex_env->inside_or--; + } + ; + + +range + : '[' _NUMBER_ ']' + { + if ($2 <= 0) + { + yyerror(yyscanner, lex_env, "invalid jump length"); + YYABORT; + } + + if (lex_env->inside_or && $2 > STRING_CHAINING_THRESHOLD) + { + yyerror(yyscanner, lex_env, "jumps over " + STR(STRING_CHAINING_THRESHOLD) + " now allowed inside alternation (|)"); + YYABORT; + } + + $$ = yr_re_node_create(RE_NODE_RANGE_ANY, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + + $$->start = (int) $2; + $$->end = (int) $2; + } + | '[' _NUMBER_ '-' _NUMBER_ ']' + { + if (lex_env->inside_or && + ($2 > STRING_CHAINING_THRESHOLD || + $4 > STRING_CHAINING_THRESHOLD) ) + { + yyerror(yyscanner, lex_env, "jumps over " + STR(STRING_CHAINING_THRESHOLD) + " now allowed inside alternation (|)"); + + YYABORT; + } + + if ($2 < 0 || $4 < 0) + { + yyerror(yyscanner, lex_env, "invalid negative jump length"); + YYABORT; + } + + if ($2 > $4) + { + yyerror(yyscanner, lex_env, "invalid jump range"); + YYABORT; + } + + $$ = yr_re_node_create(RE_NODE_RANGE_ANY, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + + $$->start = (int) $2; + $$->end = (int) $4; + } + | '[' _NUMBER_ '-' ']' + { + if (lex_env->inside_or) + { + yyerror(yyscanner, lex_env, + "unbounded jumps not allowed inside alternation (|)"); + YYABORT; + } + + if ($2 < 0) + { + yyerror(yyscanner, lex_env, "invalid negative jump length"); + YYABORT; + } + + $$ = yr_re_node_create(RE_NODE_RANGE_ANY, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + + $$->start = (int) $2; + $$->end = INT_MAX; + } + | '[' '-' ']' + { + if (lex_env->inside_or) + { + yyerror(yyscanner, lex_env, + "unbounded jumps not allowed inside alternation (|)"); + YYABORT; + } + + $$ = yr_re_node_create(RE_NODE_RANGE_ANY, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + + $$->start = 0; + $$->end = INT_MAX; + } + ; + + +alternatives + : tokens + { + $$ = $1; + } + | alternatives '|' tokens + { + mark_as_not_fast_regexp(); + + fail_if_too_many_ast_levels({ + yr_re_node_destroy($1); + yr_re_node_destroy($3); + }); + + $$ = yr_re_node_create(RE_NODE_ALT, $1, $3); + + destroy_node_if($$ == NULL, $1); + destroy_node_if($$ == NULL, $3); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + ; + +byte + : _BYTE_ + { + $$ = yr_re_node_create(RE_NODE_LITERAL, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + + $$->value = (int) $1; + } + | _MASKED_BYTE_ + { + uint8_t mask = (uint8_t) ($1 >> 8); + + if (mask == 0x00) + { + $$ = yr_re_node_create(RE_NODE_ANY, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + else + { + $$ = yr_re_node_create(RE_NODE_MASKED_LITERAL, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + + $$->value = $1 & 0xFF; + $$->mask = mask; + } + } + ; + +%% \ No newline at end of file diff --git a/grammar/hex_lexer.l b/grammar/hex_lexer.l new file mode 100644 index 0000000..60f0976 --- /dev/null +++ b/grammar/hex_lexer.l @@ -0,0 +1,125 @@ +/* +Copyright (c) 2013. The YARA Authors. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Lexical analyzer for hex strings */ + +%{ + +%} + +%option reentrant bison-bridge +%option noyywrap +%option nounistd +%option noinput +%option nounput +%option never-interactive +%option yylineno +%option prefix="hex_yy" + +%option outfile="lex.yy.go" + +%option verbose +%option warn + +digit [0-9] +letter [a-zA-Z] +hexdigit [a-fA-F0-9] + +%x comment +%x range + +%% + + +{hexdigit}{2} { + return _BYTE_; +} + +{hexdigit}\? { + return _MASKED_BYTE_; +} + +\?{hexdigit} { + return _MASKED_BYTE_; +} + +\?\? { + return _MASKED_BYTE_; +} + +\[ { + + BEGIN(range); + return yytext[0]; +} + +"/*"(.|\n)*"*/" // skip comments + +"//".* // skip single-line comments + +\- { + return yytext[0]; +} + +{digit}+ { + return _NUMBER_; +} + +\] { + + BEGIN(INITIAL); + return yytext[0]; +} + +[ \t\r\n] // skip whitespaces + +. { + + /* + yyerror(yyscanner, lex_env, "invalid character in hex string jump"); + yyterminate(); + */ +} + +[ \t\r\n] // skip whitespaces + +[{}()|] { // pass valid characters to the parser + + return yytext[0]; +} + +. { // reject all other characters + + /* + yyerror(yyscanner, lex_env, "invalid character in hex string"); + yyterminate(); + */ +} + +%% diff --git a/grammar/lexer.go b/grammar/lexer.go new file mode 100644 index 0000000..dfac332 --- /dev/null +++ b/grammar/lexer.go @@ -0,0 +1,2507 @@ +//line /grammar/lexer.go:2 +//line /grammar/lexer.l:33 +package grammar + +import ( + "fmt" + "io" + "log" + "os" + "math" + "strconv" + "strings" + + "github.com/VirusTotal/go-yara-parser/data" +) + +// Necessary types for flexgo +type YYtype interface{} +type YYcontext interface{} + +// Global YYtext - each action sets this to the current yytext +var YYtext string + +// Accumulators for gathering groups of tokens as text because text collection +// is like an "inclusive" start condition while the lexer requires "exclusive" +var ( + collect bool // condition text + conditionBuilder strings.Builder + collectText bool // string/regex/etc text + textBuilder strings.Builder +) + + // This comment applies to the YY_USER_ACTION macro, which is having + // a problem with comments... + // For condition, the colons and whitespace will be collected in the + // prefix and the right brace for the suffix. Use strings.TrimLeft/Right + + + + + +//line /grammar/lexer.go:43 + +// START OF SKELL ------------------------------------------------------ +// A lexical scanner generated by flexgo + +type Scanner struct { + In io.Reader + Out io.Writer + Lineno int + + Filename string + Wrap func(*Scanner) bool + IsInteractive func(io.Reader) bool + Context YYcontext + + lastAcceptingState int + lastAcceptingCpos int + debug bool + start int + stateBuf []int + statePtr int + fullState int + fullMatch int + fullLp int + lp int + lookingForTrailBegin int + holdChar byte + cBufP int + didBufferSwitchOnEof bool + textPtr int + nChars int + init bool + moreFlag bool + moreLen int + + // buffer + inputFile io.Reader + chBuf []byte // input buffer + bufPos int // current position in input buffer + bufSize int + bufNChars int + Interactive bool + atBol int // 0 (false) or 1 (true) + fillBuffer bool + bufferStatus int +} + +func NewScanner() *Scanner { + yy := Scanner{ + Lineno: 1, + In: os.Stdin, + Out: os.Stdout, + Wrap: func(yyy *Scanner) bool { return true }, + IsInteractive: func(file io.Reader) bool { return yyInteractiveDefault }, + bufSize: yyBufSize, + chBuf: make([]byte, yyBufSize+2), + start: 1, + stateBuf: make([]int, yyBufSize+2), + atBol: 1, + debug: yyFlexDebug, + fillBuffer: true, + } + return &yy +} + +func (yy *Scanner) NewFile() { + yy.Restart(yy.In) +} + +const yyEndOfBufferChar = 0 + +const yyBufSize = 32768 + +const ( + eobActEndOfFile = 0 + eobActContinueScan = 1 + eobActLastMatch = 2 +) + +const ( + yyBufferNew = 0 + yyBufferNormal = 1 + yyBufferEofPending = 2 +) + +// [1.0] the user's section 1 definitions and yytext/yyin/yyout/yy_state_type/yylineno etc. def's & init go here +/* Begin user sect3 */ +const yyFlexDebug = false + +const yyInteractiveDefault = false +// SKEL ---------------------------------------------------------------- + +// [1.5] DFA------------------------------------------------------------ +// SKEL ---------------------------------------------------------------- + +// [4.0] data tables for the DFA go here ------------------------------- +const yyNumRules = 76 +const yyEndOfBuffer = 77 +var yyAccept = [245]int16{ 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 77, 75, 74, 74, 50, 71, 48, 47, 75, 72, + 53, 53, 3, 75, 4, 49, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 39, 40, 63, 64, 57, 76, 69, + 70, 66, 76, 76, 44, 44, 50, 8, 48, 46, + 47, 2, 42, 45, 0, 53, 0, 0, 0, 0, + 9, 5, 7, 6, 10, 49, 52, 52, 52, 52, + 27, 52, 52, 52, 52, 52, 52, 52, 52, 28, + 52, 52, 52, 29, 26, 52, 52, 52, 52, 52, + + 52, 52, 52, 0, 0, 63, 65, 60, 61, 59, + 58, 65, 69, 66, 66, 68, 67, 43, 45, 54, + 53, 56, 55, 32, 25, 33, 52, 52, 52, 52, + 52, 31, 52, 52, 52, 52, 52, 52, 52, 52, + 24, 52, 52, 52, 52, 52, 52, 52, 18, 73, + 0, 0, 0, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 51, 52, 14, 52, 52, + 13, 52, 30, 22, 17, 0, 0, 0, 0, 0, + 73, 62, 16, 52, 52, 52, 23, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 0, 0, 52, + + 52, 52, 52, 52, 12, 38, 52, 51, 52, 20, + 52, 52, 0, 0, 0, 0, 0, 73, 52, 52, + 52, 52, 52, 41, 36, 11, 15, 0, 73, 0, + 0, 0, 52, 37, 52, 35, 19, 0, 0, 0, + 1, 52, 34, 0, + } + +var yyEc = [256]byte{ 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 4, 5, 6, 7, 1, 1, 1, 8, + 8, 9, 1, 1, 8, 10, 11, 12, 13, 14, + 15, 16, 16, 17, 16, 18, 19, 1, 1, 20, + 21, 22, 8, 23, 24, 25, 24, 24, 24, 24, + 26, 26, 26, 26, 27, 26, 28, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 8, 29, 8, 1, 30, 1, 31, 32, 33, 34, + + 35, 36, 37, 38, 39, 26, 26, 40, 41, 42, + 43, 44, 26, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 8, 55, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + } + +var yyMeta = [56]byte{ 0, + 1, 2, 3, 1, 4, 1, 1, 2, 5, 6, + 7, 8, 8, 8, 8, 8, 8, 8, 8, 1, + 9, 1, 1, 10, 10, 11, 12, 12, 13, 11, + 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, + 11, 11, 12, 11, 11, 11, 11, 11, 11, 11, + 12, 11, 11, 1, 1, + } + +var yyBase = [267]uint16{ 0, + 0, 0, 53, 54, 57, 58, 0, 0, 357, 350, + 358, 646, 646, 646, 336, 646, 0, 347, 345, 53, + 53, 57, 45, 331, 49, 0, 0, 32, 308, 308, + 58, 309, 34, 42, 296, 54, 291, 287, 287, 50, + 294, 290, 285, 316, 646, 0, 646, 646, 86, 0, + 646, 59, 315, 646, 646, 314, 303, 646, 0, 646, + 314, 646, 646, 0, 104, 0, 297, 296, 97, 0, + 646, 646, 646, 646, 646, 0, 0, 280, 73, 286, + 0, 276, 270, 276, 275, 269, 273, 269, 261, 91, + 257, 256, 93, 0, 0, 263, 261, 253, 262, 248, + + 253, 251, 237, 81, 118, 0, 646, 646, 646, 646, + 646, 0, 0, 235, 646, 646, 646, 646, 0, 129, + 646, 137, 0, 0, 0, 0, 241, 121, 234, 229, + 239, 0, 233, 240, 228, 230, 117, 236, 237, 236, + 0, 217, 230, 225, 222, 227, 214, 225, 0, 646, + 250, 167, 0, 219, 212, 219, 197, 213, 201, 194, + 212, 197, 193, 214, 216, 196, 189, 0, 180, 194, + 0, 179, 0, 0, 0, 211, 208, 221, 0, 147, + 0, 646, 0, 171, 178, 172, 0, 176, 171, 173, + 165, 177, 175, 174, 173, 160, 169, 275, 148, 166, + + 162, 154, 143, 150, 0, 0, 159, 0, 147, 0, + 155, 143, 329, 0, 383, 177, 151, 0, 144, 131, + 137, 104, 100, 0, 0, 0, 0, 152, 122, 155, + 92, 156, 60, 0, 52, 0, 0, 162, 0, 163, + 0, 46, 0, 646, 438, 451, 464, 477, 483, 488, + 496, 503, 508, 513, 524, 534, 546, 559, 571, 584, + 590, 593, 603, 616, 622, 632, + } + +var yyDef = [267]int16{ 0, + 244, 1, 245, 245, 246, 246, 247, 247, 248, 248, + 244, 244, 244, 244, 249, 244, 250, 251, 244, 244, + 252, 252, 244, 244, 244, 253, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, + 254, 254, 254, 255, 244, 256, 244, 244, 257, 258, + 244, 244, 259, 244, 244, 244, 249, 244, 250, 244, + 251, 244, 244, 260, 244, 22, 244, 244, 244, 261, + 244, 244, 244, 244, 244, 253, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, + + 254, 254, 254, 255, 244, 256, 244, 244, 244, 244, + 244, 262, 258, 244, 244, 244, 244, 244, 260, 244, + 244, 244, 261, 254, 254, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 254, 254, 254, 254, 244, + 263, 264, 265, 254, 254, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 263, 244, 264, 178, 178, + 178, 244, 254, 254, 254, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 254, 254, 266, 178, 254, + + 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, + 254, 254, 266, 213, 263, 178, 213, 213, 254, 254, + 254, 254, 254, 254, 254, 254, 254, 263, 263, 178, + 213, 213, 254, 254, 254, 254, 254, 263, 213, 213, + 254, 254, 254, 0, 244, 244, 244, 244, 244, 244, + 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, + 244, 244, 244, 244, 244, 244, + } + +var yyNxt = [702]uint16{ 0, + 12, 13, 14, 15, 16, 17, 18, 12, 12, 19, + 20, 21, 22, 22, 22, 22, 22, 22, 22, 23, + 24, 25, 26, 27, 27, 27, 27, 27, 12, 27, + 28, 27, 29, 27, 30, 31, 32, 27, 33, 27, + 34, 35, 36, 37, 38, 39, 40, 41, 27, 42, + 43, 27, 27, 44, 45, 47, 47, 48, 48, 51, + 51, 63, 65, 64, 71, 72, 65, 52, 52, 74, + 75, 78, 91, 79, 89, 90, 92, 80, 81, 67, + 68, 49, 49, 67, 68, 53, 53, 99, 84, 94, + 108, 105, 243, 242, 100, 69, 85, 114, 95, 244, + + 86, 241, 240, 70, 115, 87, 125, 244, 122, 122, + 122, 122, 122, 122, 109, 120, 120, 120, 120, 120, + 120, 120, 120, 136, 126, 140, 151, 110, 152, 164, + 177, 165, 111, 237, 166, 150, 112, 137, 236, 141, + 120, 120, 120, 120, 120, 120, 120, 120, 122, 122, + 122, 122, 122, 122, 155, 198, 198, 199, 199, 231, + 238, 232, 239, 198, 231, 199, 232, 156, 179, 104, + 177, 231, 215, 232, 179, 235, 234, 180, 179, 179, + 179, 179, 179, 179, 179, 179, 233, 230, 227, 226, + 179, 179, 225, 224, 223, 222, 221, 179, 179, 179, + + 179, 179, 179, 220, 219, 212, 211, 210, 209, 208, + 207, 206, 205, 204, 203, 202, 201, 200, 104, 177, + 197, 181, 179, 104, 196, 195, 194, 193, 179, 166, + 166, 180, 179, 179, 179, 179, 179, 179, 179, 179, + 192, 191, 190, 189, 179, 179, 188, 187, 186, 185, + 184, 179, 179, 179, 179, 179, 179, 183, 177, 175, + 137, 174, 173, 172, 171, 170, 169, 168, 167, 163, + 162, 161, 160, 159, 158, 181, 214, 215, 157, 154, + 115, 149, 214, 216, 148, 217, 214, 214, 214, 214, + 214, 214, 214, 214, 147, 146, 145, 144, 214, 214, + + 143, 142, 139, 138, 135, 214, 214, 214, 214, 214, + 214, 134, 133, 132, 131, 130, 129, 128, 127, 124, + 121, 121, 60, 244, 118, 117, 105, 103, 102, 218, + 214, 215, 101, 98, 97, 96, 214, 216, 93, 217, + 214, 214, 214, 214, 214, 214, 214, 214, 88, 83, + 82, 73, 214, 214, 62, 60, 58, 244, 56, 214, + 214, 214, 214, 214, 214, 56, 244, 244, 244, 244, + 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, + 244, 244, 244, 218, 215, 215, 244, 244, 244, 244, + 215, 177, 244, 228, 215, 215, 215, 215, 215, 215, + + 215, 215, 244, 244, 244, 244, 215, 215, 244, 244, + 244, 244, 244, 215, 215, 215, 215, 215, 215, 244, + 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, + 244, 244, 244, 244, 244, 244, 244, 229, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, + 46, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 57, 57, 57, 57, 57, 59, 244, 59, 59, 59, + + 61, 244, 244, 61, 244, 61, 61, 61, 66, 244, + 66, 244, 244, 244, 66, 76, 244, 76, 76, 76, + 77, 244, 77, 77, 77, 104, 104, 244, 244, 244, + 104, 104, 244, 104, 106, 106, 244, 244, 106, 106, + 106, 106, 106, 106, 106, 106, 107, 107, 107, 107, + 107, 107, 107, 107, 107, 107, 107, 107, 107, 113, + 113, 244, 113, 113, 113, 244, 113, 113, 113, 113, + 113, 116, 116, 244, 116, 116, 116, 116, 116, 116, + 116, 116, 116, 116, 119, 119, 244, 119, 119, 119, + 119, 119, 119, 119, 119, 119, 119, 123, 244, 123, + + 153, 244, 153, 176, 176, 176, 176, 176, 176, 176, + 176, 176, 176, 176, 176, 176, 178, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, 178, 182, + 244, 182, 213, 213, 213, 213, 213, 213, 213, 213, + 213, 213, 213, 213, 213, 11, 244, 244, 244, 244, + 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, + 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, + 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, + 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, + 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, + + 244, + } + +var yyChk = [702]int16{ 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 3, 4, 3, 4, 5, + 6, 20, 21, 20, 23, 23, 22, 5, 6, 25, + 25, 28, 34, 28, 33, 33, 34, 28, 28, 21, + 21, 3, 4, 22, 22, 5, 6, 40, 31, 36, + 49, 104, 242, 235, 40, 21, 31, 52, 36, 22, + + 31, 233, 231, 21, 52, 31, 79, 22, 69, 69, + 69, 69, 69, 69, 49, 65, 65, 65, 65, 65, + 65, 65, 65, 90, 79, 93, 105, 49, 105, 137, + 229, 137, 49, 223, 137, 104, 49, 90, 222, 93, + 120, 120, 120, 120, 120, 120, 120, 120, 122, 122, + 122, 122, 122, 122, 128, 180, 199, 180, 199, 217, + 228, 217, 228, 230, 232, 230, 232, 128, 152, 152, + 238, 240, 238, 240, 152, 221, 220, 152, 152, 152, + 152, 152, 152, 152, 152, 152, 219, 216, 212, 211, + 152, 152, 209, 207, 204, 203, 202, 152, 152, 152, + + 152, 152, 152, 201, 200, 197, 196, 195, 194, 193, + 192, 191, 190, 189, 188, 186, 185, 184, 177, 176, + 172, 152, 178, 178, 170, 169, 167, 166, 178, 165, + 164, 178, 178, 178, 178, 178, 178, 178, 178, 178, + 163, 162, 161, 160, 178, 178, 159, 158, 157, 156, + 155, 178, 178, 178, 178, 178, 178, 154, 151, 148, + 147, 146, 145, 144, 143, 142, 140, 139, 138, 136, + 135, 134, 133, 131, 130, 178, 198, 198, 129, 127, + 114, 103, 198, 198, 102, 198, 198, 198, 198, 198, + 198, 198, 198, 198, 101, 100, 99, 98, 198, 198, + + 97, 96, 92, 91, 89, 198, 198, 198, 198, 198, + 198, 88, 87, 86, 85, 84, 83, 82, 80, 78, + 68, 67, 61, 57, 56, 53, 44, 43, 42, 198, + 213, 213, 41, 39, 38, 37, 213, 213, 35, 213, + 213, 213, 213, 213, 213, 213, 213, 213, 32, 30, + 29, 24, 213, 213, 19, 18, 15, 11, 10, 213, + 213, 213, 213, 213, 213, 9, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 213, 215, 215, 0, 0, 0, 0, + 215, 215, 0, 215, 215, 215, 215, 215, 215, 215, + + 215, 215, 0, 0, 0, 0, 215, 215, 0, 0, + 0, 0, 0, 215, 215, 215, 215, 215, 215, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 215, 245, 245, + 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, + 245, 246, 246, 246, 246, 246, 246, 246, 246, 246, + 246, 246, 246, 246, 247, 247, 247, 247, 247, 247, + 247, 247, 247, 247, 247, 247, 247, 248, 248, 248, + 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, + 249, 249, 249, 249, 249, 250, 0, 250, 250, 250, + + 251, 0, 0, 251, 0, 251, 251, 251, 252, 0, + 252, 0, 0, 0, 252, 253, 0, 253, 253, 253, + 254, 0, 254, 254, 254, 255, 255, 0, 0, 0, + 255, 255, 0, 255, 256, 256, 0, 0, 256, 256, + 256, 256, 256, 256, 256, 256, 257, 257, 257, 257, + 257, 257, 257, 257, 257, 257, 257, 257, 257, 258, + 258, 0, 258, 258, 258, 0, 258, 258, 258, 258, + 258, 259, 259, 0, 259, 259, 259, 259, 259, 259, + 259, 259, 259, 259, 260, 260, 0, 260, 260, 260, + 260, 260, 260, 260, 260, 260, 260, 261, 0, 261, + + 262, 0, 262, 263, 263, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 264, 264, 264, 264, + 264, 264, 264, 264, 264, 264, 264, 264, 264, 265, + 0, 265, 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 266, 266, 244, 244, 244, 244, 244, + 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, + 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, + 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, + 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, + 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, + + 244, + } + +/* Table of booleans, true if rule could match eol. */ +var yyRuleCanMatchEol = [77]int32{ 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, }; + +//line /grammar/lexer.l:1 +/* +Copyright (c) 2007-2013. The YARA Authors. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +/* Lexical analyzer for YARA */ + +//line /grammar/lexer.l:72 + + + +// Define a constant for end-of-file +const eof = 0 + +// These globals must be accessible to the goyacc parser. +// Bear in mind that the xx prefix is used because the parser is generated +// with this prefix in order to avoid name collisions. +var yylval *xxSymType +var err error // Actually, this is global for lexer action access. + + + + + +//line /grammar/lexer.go:492 +// SKEL ---------------------------------------------------------------- + +const yyInitial = 0 +const str = 1 +const regexp = 2 +const include = 3 +const comment = 4 + +const yyReadBufSize = 16384 + +func (yy *Scanner) input(offset, maxRead int) int { + +// [5.0] fread()/read() definition of yy_INPUT goes here --------------- +// nothing here, all moved to skeleton +// SKEL ---------------------------------------------------------------- + + if yy.Interactive { + b := make([]byte, 1) + var n int + for n = 0; n < maxRead; n++ { + nn, err := yy.inputFile.Read(b) + if err != nil && err != io.EOF { + log.Panicln("Reading 1 byte:", err) + } + if nn < 1 { + break + } + yy.chBuf[offset+n] = b[0] + if b[0] == '\n' { + n++ + break + } + } + return n + } + + n, err := yy.inputFile.Read(yy.chBuf[offset:offset+maxRead]) + if err != nil && err != io.EOF { + log.Panicf("Reading %d bytes: %v\n", maxRead, err) + } + return n +} + +/* [6.0] YY_RULE_SETUP definition goes here --------------------------*/ + +// SKEL ---------------------------------------------------------------- + +// The main scanner function which does all the work. +func (yy *Scanner) Lex() YYtype { + var yyCurrentState int + var yyBp, yyCp int + var yyAct int + var yytext []byte + var yyleng int + var yylineno int + _ = yytext + _ = yyleng + _ = yylineno + + if !yy.init { + yy.init = true + + if yy.In == nil { + yy.In = os.Stdin + } + if yy.Out == nil { + yy.Out = os.Stdout + } + yy.initBuffer(yy.In) + yy.loadBufferState() + } + + yyout := yy.Out + _ = yyout + +// [7.0] user's declarations go here ----------------------------------- +//line /grammar/lexer.l:115 + + +//line /grammar/lexer.go:572 +// SKEL ---------------------------------------------------------------- + + for { // loops until end-of-file is reached + +// [8.0] yy''more-related code goes here ------------------------------- +// SKEL ---------------------------------------------------------------- + + yyCp = yy.cBufP + + /* Support of yytext. */ + yy.chBuf[yyCp] = yy.holdChar + + // yyBp points to the position in yy_ch_buf of the start of + // the current run. + yyBp = yyCp + +// [9.0] code to set up and find next match goes here ------------------ + yyCurrentState = yy.start +yyMatch: + for { + yyC := int(yyEc[yy.chBuf[yyCp]]) + if yyAccept[yyCurrentState] != 0 { + yy.lastAcceptingState = yyCurrentState + yy.lastAcceptingCpos = yyCp + } + for int(yyChk[int(yyBase[yyCurrentState])+yyC]) != yyCurrentState { + yyCurrentState = int(yyDef[yyCurrentState]) + if yyCurrentState >= 245 { + yyC = int(yyMeta[yyC]) + } + } + yyCurrentState = int(yyNxt[int(yyBase[yyCurrentState])+yyC]) + yyCp++ + if yyCurrentState == 244 { + break + } + } + yyCp = yy.lastAcceptingCpos + yyCurrentState = yy.lastAcceptingState +// SKEL ---------------------------------------------------------------- + + yyFindAction: + +// [10.0] code to find the action number goes here --------------------- + yyAct = int(yyAccept[yyCurrentState]) +// SKEL ---------------------------------------------------------------- + + yy.textPtr = yyBp + +// [2.0] code to fiddle yytext and yyleng for yy''more() goes here ------- + yyleng = yyCp - yyBp +// SKEL ---------------------------------------------------------------- + + yy.holdChar = yy.chBuf[yyCp] + yy.chBuf[yyCp] = 0 + +// [3.0] code to copy yytext_ptr to yytext[] goes here, if %array ------ +// SKEL ---------------------------------------------------------------- + + yy.cBufP = yyCp + yytext = yy.chBuf[yy.textPtr:yyCp] + +// [11.0] code for yylineno update goes here --------------------------- + + if yyAct != yyEndOfBuffer && yyRuleCanMatchEol[yyAct] != 0 { + for yyl := 0; yyl < yyleng; yyl++ { + if yytext[yyl] == '\n' { + yy.Lineno++ + } + } + } + +// SKEL ---------------------------------------------------------------- + + doAction: // This label is used only to access EOF actions. + +// [12.0] debug code goes here ----------------------------------------- +// SKEL ---------------------------------------------------------------- + + switch yyAct { // beginning of action switch + +// [13.0] actions go here ---------------------------------------------- + case 0: // must back up + // undo the effects of yy_DO_BEFORE_ACTION + yy.chBuf[yyCp] = yy.holdChar + yyCp = yy.lastAcceptingCpos + yyCurrentState = yy.lastAcceptingState + goto yyFindAction + +case 1: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + +//line /grammar/lexer.l:117 +{ + collect = true + conditionBuilder.Reset() + return _CONDITION_ +} +case 2: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:123 +{ return _DOT_DOT_; } +case 3: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:124 +{ return _LT_; } +case 4: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:125 +{ return _GT_; } +case 5: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:126 +{ return _LE_; } +case 6: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:127 +{ return _GE_; } +case 7: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:128 +{ return _EQ_; } +case 8: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:129 +{ return _NEQ_; } +case 9: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:130 +{ return _SHIFT_LEFT_; } +case 10: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:131 +{ return _SHIFT_RIGHT_; } +case 11: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:132 +{ return _PRIVATE_; } +case 12: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:133 +{ return _GLOBAL_; } +case 13: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:134 +{ return _RULE_; } +case 14: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:135 +{ return _META_; } +case 15: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:136 +{ return _STRINGS_; } +case 16: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:137 +{ return _ASCII_; } +case 17: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:138 +{ return _WIDE_; } +case 18: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:139 +{ return _XOR_; } +case 19: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:140 +{ return _FULLWORD_; } +case 20: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:141 +{ return _NOCASE_; } +case 21: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:142 +{ return _CONDITION_; } +case 22: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:143 +{ return _TRUE_; } +case 23: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:144 +{ return _FALSE_; } +case 24: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:145 +{ return _NOT_; } +case 25: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:146 +{ return _AND_; } +case 26: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:147 +{ return _OR_; } +case 27: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:148 +{ return _AT_; } +case 28: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:149 +{ return _IN_; } +case 29: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:150 +{ return _OF_; } +case 30: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:151 +{ return _THEM_; } +case 31: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:152 +{ return _FOR_; } +case 32: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:153 +{ return _ALL_; } +case 33: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:154 +{ return _ANY_; } +case 34: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:155 +{ return _ENTRYPOINT_; } +case 35: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:156 +{ return _FILESIZE_; } +case 36: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:157 +{ return _MATCHES_; } +case 37: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:158 +{ return _CONTAINS_; } +case 38: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:159 +{ return _IMPORT_; } +case 39: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:161 +{ return _LBRACE_; } +case 40: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:162 +{ + if collect { + collect = false + } + return _RBRACE_; + } +case 41: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:168 +{ return _INCLUDE_; } +case 42: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:170 +{ yy.start = 1 + 2* (comment); } +case 43: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:171 +{ yy.start = 1 + 2* (yyInitial ); } +case 44: +/* rule 44 can match eol */ + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:172 +{ /* skip comments */ } +case 45: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:175 +{ /* skip single-line comments */ } +case (yyEndOfBuffer + yyInitial + 1) : + fallthrough +case (yyEndOfBuffer + str + 1) : + fallthrough +case (yyEndOfBuffer + regexp + 1) : + fallthrough +case (yyEndOfBuffer + include + 1) : + fallthrough +case (yyEndOfBuffer + comment + 1) : +//line /grammar/lexer.l:177 +{ return eof } +case 46: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:180 +{ + yylval.s = string(yytext) + return _STRING_IDENTIFIER_WITH_WILDCARD_; +} +case 47: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:186 +{ + yylval.s = string(yytext) + return _STRING_IDENTIFIER_; +} +case 48: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:192 +{ + return _STRING_COUNT_; +} +case 49: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:197 +{ + return _STRING_OFFSET_; +} +case 50: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:202 +{ + return _STRING_LENGTH_; +} +case 51: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:207 +{ + return _INTEGER_FUNCTION_; +} +case 52: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:212 +{ + yylval.s = string(yytext) + return _IDENTIFIER_; +} +case 53: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:218 +{ + var err error + s := strings.TrimRight(YYtext, "MKB") + yylval.i64, err = strconv.ParseInt(s, 10, 64) + if err != nil { + panic(fmt.Errorf("Number conversion error: %s", err)) + } + + if strings.HasSuffix(YYtext, "KB") { + if yylval.i64 > math.MaxInt64 / 1024 { + err := fmt.Errorf("Integer overflow: %s; max %d", + YYtext, math.MaxInt64) + panic(err) + } else { + yylval.i64 *= 1024 + } + } else if strings.HasSuffix(YYtext, "MB") { + if yylval.i64 > math.MaxInt64 / 1048576 { + err := fmt.Errorf("Integer overflow: %s; max %d", + YYtext, math.MaxInt64) + panic(err) + } else { + yylval.i64 *= 1048576 + } + } + return _NUMBER_; +} +case 54: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:246 +{ + return _DOUBLE_; +} +case 55: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:250 +{ + var err error + yylval.i64, err = strconv.ParseInt(YYtext, 0, 64) + if err != nil { + panic(fmt.Errorf("Number conversion error: %s", err)) + } + + return _NUMBER_; +} +case 56: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:260 +{ + var err error + s := strings.TrimLeft(YYtext, "0o") + yylval.i64, err = strconv.ParseInt(s, 8, 64) + if err != nil { + panic(fmt.Errorf("Number conversion error: %s", err)) + } + return _NUMBER_; +} +case 57: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:271 +{ /* saw closing quote - all done */ + + // NOTE: textBuilder.String() will end with `"` char + collectText = false + yylval.s = strings.TrimSuffix(textBuilder.String(), `"`) + + yy.start = 1 + 2* (yyInitial ); + + return _TEXT_STRING_; +} +case 58: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:283 +{ +} +case 59: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:287 +{ +} +case 60: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:291 +{ +} +case 61: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:295 +{ +} +case 62: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:299 +{ +} +case 63: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:303 +{ } +case 64: +/* rule 64 can match eol */ + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:306 +{ + panic(fmt.Errorf("unterminated string")) +} +case 65: +/* rule 65 can match eol */ + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:311 +{ + panic(fmt.Errorf("illegal escape sequence")) +} +case 66: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:316 +{ + collectText = false + + txt := strings.TrimSuffix(textBuilder.String(), YYtext) + + var mods data.StringModifiers + + for _, c := range YYtext { + switch c { + case 'i': + mods.I = true + + case 's': + mods.S = true + + case '/': + // Ignore + + default: + // Should be impossible + panic(fmt.Errorf("Invalid regex modifier: %c", c)) + } + } + + yylval.reg = regexPair { + txt, + mods, + } + + yy.start = 1 + 2* (yyInitial ); + return _REGEXP_; +} +case 67: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:350 +{ +} +case 68: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:354 +{ +} +case 69: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:358 +{ } +case 70: +/* rule 70 can match eol */ + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:361 +{ + panic(fmt.Errorf("unterminated regular expression")) +} +case 71: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:366 +{ + collectText = true + textBuilder.Reset() + yy.start = 1 + 2* (str); +} +case 72: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:373 +{ + collectText = true + textBuilder.Reset() + yy.start = 1 + 2* (regexp); +} +case 73: +/* rule 73 can match eol */ + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:380 +{ + // Match hex-digits with whitespace or comments. The latter are stripped + // out by hex_lexer.l + + // NOTE: The above comment may not apply. We plan to not use hex_lexer.l + + // No need to collect like str and regexp start conditions + yylval.s = strings.Trim(YYtext, "{}") + + return _HEX_STRING_; +} +case 74: +/* rule 74 can match eol */ + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:393 +/* skip whitespace */ +case 75: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:395 +{ + + r := int(yytext[0]) + + if r >= 32 && r < 127 { + return r + } + + panic(fmt.Errorf("non-ascii byte '%d'", r)) +} +case 76: + + yylineno = yy.Lineno + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } + + +//line /grammar/lexer.l:406 +yyout.Write(yytext) +//line /grammar/lexer.go:2044 +// SKEL ---------------------------------------------------------------- + + case yyEndOfBuffer: + /* Amount of text matched not including the EOB char. */ + yyAmountOfMatchedText := yyCp - yy.textPtr - 1 + + /* Undo the effects of yy_DO_BEFORE_ACTION. */ + yy.chBuf[yyCp] = yy.holdChar + + if yy.bufferStatus == yyBufferNew { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between yy_CURRENT_BUFFER and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + yy.nChars = yy.bufNChars + yy.inputFile = yy.In + yy.bufferStatus = yyBufferNormal + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if yy.cBufP <= yy.nChars { + /* This was really a NUL. */ + var yyNextState int + + yy.cBufP = yy.textPtr + yyAmountOfMatchedText + + yyCurrentState = yy.getPreviousState() + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yyNextState = yy.tryNulTrans(yyCurrentState) + + yyBp = yy.textPtr + 0 + + if yyNextState != 0 { + /* Consume the NUL. */ + yy.cBufP++ + yyCp = yy.cBufP + yyCurrentState = yyNextState + goto yyMatch + } else { + +// [14.0] code to do back-up for compressed tables and set up yy_cp goes here + yyCp = yy.lastAcceptingCpos + yyCurrentState = yy.lastAcceptingState +// SKEL ---------------------------------------------------------------- + + goto yyFindAction + } + + } else { + + switch yy.getNextBuffer() { + case eobActEndOfFile: + yy.didBufferSwitchOnEof = false + + if yy.Wrap(yy) { + // Note: because we've taken care in + // yy_get_next_buffer() to have set up + // yytext, we can now set up + // yy.cBufP so that if some total + // hoser (like flex itself) wants to + // call the scanner after we return the + // yy_NULL, it'll still work - another + // yy_NULL will get returned. + yy.cBufP = yy.textPtr + 0 + + yyAct = (yyEndOfBuffer + ((yy.start - 1) / 2) + 1) + goto doAction + } else { + if !yy.didBufferSwitchOnEof { + yy.NewFile() + } + } + case eobActContinueScan: + yy.cBufP = yy.textPtr + yyAmountOfMatchedText + + yyCurrentState = yy.getPreviousState() + + yyCp = yy.cBufP + yyBp = yy.textPtr + 0 + goto yyMatch + case eobActLastMatch: + yy.cBufP = yy.nChars + + yyCurrentState = yy.getPreviousState() + + yyCp = yy.cBufP + yyBp = yy.textPtr + 0 + goto yyFindAction + } + } + + default: + log.Panicln("fatal flex scanner internal error--no action found:", yyAct) + } // end of action switch + } // end of scanning one token + var yyvalue YYtype + return yyvalue +} // end of yylex + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ +func (yy *Scanner) getNextBuffer() int { + + var numberToMove int + var retval int + + if yy.cBufP > yy.nChars+1 { + log.Panic("fatal flex scanner internal error--end of buffer missed") + } + + if !yy.fillBuffer { + // Don't try to fill the buffer, so this is an EOF. + if yy.cBufP-yy.textPtr-0 == 1 { + // We matched a single character, the EOB, so + // treat this as a final EOF. + return eobActEndOfFile + } else { + // We matched some text prior to the EOB, first + // process it. + return eobActLastMatch + } + } + + // Try to read more data. + + // First move last chars to start of buffer. + numberToMove = yy.cBufP - yy.textPtr - 1 + + copy(yy.chBuf, yy.chBuf[yy.textPtr:yy.textPtr+numberToMove]) + + if yy.bufferStatus == yyBufferEofPending { + // don't do the read, it's not guaranteed to return an EOF, + // just force an EOF + yy.nChars = 0 + yy.bufNChars = 0 + } else { + numToRead := yy.bufSize - numberToMove - 1 + + for numToRead <= 0 { + // Not enough room in the buffer - grow it. + + yyCBufPOffset := yy.cBufP + + new_size := yy.bufSize * 2 + + if new_size <= 0 { + yy.bufSize += yy.bufSize / 8 + } else { + yy.bufSize *= 2 + } + + // Include room in for 2 EOB chars. + bb := make([]byte, yy.bufSize+2-len(yy.chBuf)) + yy.chBuf = append(yy.chBuf, bb...) + + yy.cBufP = yyCBufPOffset + + numToRead = yy.bufSize - numberToMove - 1 + + } + + if numToRead > yyReadBufSize { + numToRead = yyReadBufSize + } + + // Read in more data. + yy.nChars = yy.input(numberToMove, numToRead) + yy.bufNChars = yy.nChars + } + + if yy.nChars == 0 { + if numberToMove == 0 { + retval = eobActEndOfFile + yy.Restart(yy.In) + } else { + retval = eobActLastMatch + yy.bufferStatus = yyBufferEofPending + } + } else { + retval = eobActContinueScan + } + + if yy.nChars+numberToMove > yy.bufSize { + // Extend the array by 50%, plus the number we really need. * + newSize := yy.nChars + numberToMove + (yy.nChars >> 1) + if leng := len(yy.chBuf); leng < newSize { + chBuf := make([]byte, newSize-leng) + yy.chBuf = append(yy.chBuf, chBuf...) + } + } + + yy.nChars += numberToMove + //yy.bufNChars += numberToMove // TODO: missing in C skel, bug? + yy.chBuf[yy.nChars] = yyEndOfBufferChar + yy.chBuf[yy.nChars+1] = yyEndOfBufferChar + + yy.textPtr = 0 + + return retval +} + +/* yy_get_previous_state - get the state just before the EOB char was reached */ +func (yy *Scanner) getPreviousState() int { + + var yyCurrentState int + var yyCp int + +// [15.0] code to get the start state into yy_current_state goes here -- + yyCurrentState = yy.start +// SKEL ---------------------------------------------------------------- + + for yyCp = yy.textPtr + 0 ; yyCp < yy.cBufP; yyCp++ { + +// [16.0] code to find the next state goes here ------------------------ + yyC := yyIfElse(yy.chBuf[yyCp] != 0, int(yyEc[yy.chBuf[yyCp]]), 1) + if yyAccept[yyCurrentState] != 0 { + yy.lastAcceptingState = yyCurrentState + yy.lastAcceptingCpos = yyCp + } + for int(yyChk[int(yyBase[yyCurrentState])+yyC]) != yyCurrentState { + yyCurrentState = int(yyDef[yyCurrentState]) + if yyCurrentState >= 245 { + yyC = int(yyMeta[yyC]) + } + } + yyCurrentState = int(yyNxt[int(yyBase[yyCurrentState])+yyC]) +// SKEL ---------------------------------------------------------------- + + } + return yyCurrentState +} + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ +func (yy *Scanner) tryNulTrans(yyCurrentState int) int { + + var yyIsJam bool + var yyCp int + _ = yyCp + +// [17.0] code to find the next state, and perhaps do backing up, goes here + yyCp = yy.cBufP + + yyC := 1 + if yyAccept[yyCurrentState] != 0 { + yy.lastAcceptingState = yyCurrentState + yy.lastAcceptingCpos = yyCp + } + for int(yyChk[int(yyBase[yyCurrentState])+yyC]) != yyCurrentState { + yyCurrentState = int(yyDef[yyCurrentState]) + if yyCurrentState >= 245 { + yyC = int(yyMeta[yyC]) + } + } + yyCurrentState = int(yyNxt[int(yyBase[yyCurrentState])+yyC]) + if yyCurrentState == 244 { + yyIsJam = true + } +// SKEL ---------------------------------------------------------------- + + if yyIsJam { + return 0 + } + return yyCurrentState +} + +func (yy *Scanner) Input() (byte, error) { + + yy.chBuf[yy.cBufP] = yy.holdChar + + if yy.chBuf[yy.cBufP] == yyEndOfBufferChar { + // yy_c_buf_p now points to the character we want to return. + // If this occurs *before* the EOB characters, then it's a + // valid NUL; if not, then we've hit the end of the buffer. + if yy.cBufP < yy.nChars { + // This was really a NUL. + yy.chBuf[yy.cBufP] = 0 + } else { + // need more input + offset := yy.cBufP - yy.textPtr + yy.cBufP++ + + switch yy.getNextBuffer() { + case eobActLastMatch: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yy.Restart(yy.In) + + fallthrough + + case eobActEndOfFile: + if yy.Wrap(yy) { + return 0, io.EOF + } + + if !yy.didBufferSwitchOnEof { + yy.Restart(yy.In) + } + + return yy.Input() + + case eobActContinueScan: + yy.cBufP = yy.textPtr + offset + } + } + } + + c := yy.chBuf[yy.cBufP] + yy.chBuf[yy.cBufP] = 0 // preserve yytext + yy.cBufP++ + yy.holdChar = yy.chBuf[yy.cBufP] + +// [19.0] update BOL and yylineno -------------------------------------- + if c == '\n' { + yy.Lineno++ + } +// SKEL ---------------------------------------------------------------- + +return c, nil +} + +/** Immediately switch to a different input stream. + * @param input_file A readable stream. + * + * @note This function does not reset the start condition to @c yyInitial . + */ +func (yy *Scanner) Restart(input_file io.Reader) { + yy.initBuffer(input_file) + yy.loadBufferState() +} + +func (yy *Scanner) loadBufferState() { + yy.nChars = yy.bufNChars + yy.cBufP = yy.bufPos + yy.textPtr = yy.cBufP + yy.In = yy.inputFile + yy.holdChar = yy.chBuf[yy.cBufP] +} + +/* Initializes or reinitializes a buffer. + * This function is sometimes called more than once on the same buffer, + * such as during a yyrestart() or at EOF. + */ +func (yy *Scanner) initBuffer(file io.Reader) { + + yy.flushBuffer() + + yy.inputFile = file + + yy.fillBuffer = true + + yy.Interactive = yy.IsInteractive(file) + +} + +/** Discard all buffered characters. On the next scan, YY_INPUT will be called. + * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. + * + */ +func (yy *Scanner) flushBuffer() { + + yy.bufNChars = 0 + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + yy.chBuf[0] = yyEndOfBufferChar + yy.chBuf[1] = yyEndOfBufferChar + + yy.bufPos = 0 + + yy.atBol = 1 + yy.bufferStatus = yyBufferNew + + yy.loadBufferState() +} + +func yyIfElse(b bool, i1, i2 int) int { + if b { + return i1 + } + return i2 +} + +func YYmain(filenames ...string) (interface{}, error) { + + var errval error + + yy := NewScanner() + + yy.Filename = "" + + if len(filenames) > 0 { + yy.Filename = filenames[0] + yy.In, errval = os.Open(yy.Filename) + if errval != nil { + return nil, errval + } + yy.Wrap = func(yyy *Scanner) bool { + if len(filenames) == 0 { + // should not happen + return true + } + yyy.In.(*os.File).Close() + filenames = filenames[1:] + if len(filenames) == 0 { + return true + } + yyy.Filename = filenames[0] + yyy.In, errval = os.Open(yyy.Filename) + if errval != nil { + return true + } + return false + } + } + + return yy.Lex(), errval + +} + +// END OF SKELL -------------------------------------------------------- +//line /grammar/lexer.l:406 + + + diff --git a/grammar/lexer.l b/grammar/lexer.l new file mode 100644 index 0000000..d8b584f --- /dev/null +++ b/grammar/lexer.l @@ -0,0 +1,406 @@ +/* +Copyright (c) 2007-2013. The YARA Authors. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Lexical analyzer for YARA */ + +%top{ +package grammar + +import ( + "fmt" + "io" + "log" + "os" + "math" + "strconv" + "strings" + + "github.com/VirusTotal/go-yara-parser/data" +) + +// Necessary types for flexgo +type YYtype interface{} +type YYcontext interface{} + +// Global YYtext - each action sets this to the current yytext +var YYtext string + +// Accumulators for gathering groups of tokens as text because text collection +// is like an "inclusive" start condition while the lexer requires "exclusive" +var ( + collect bool // condition text + conditionBuilder strings.Builder + collectText bool // string/regex/etc text + textBuilder strings.Builder +) + + // This comment applies to the YY_USER_ACTION macro, which is having + // a problem with comments... + // For condition, the colons and whitespace will be collected in the + // prefix and the right brace for the suffix. Use strings.TrimLeft/Right + + +} + +%{ + +YY_USER_ACTION( + YYtext = string(yytext) + + if collect { + conditionBuilder.Write(yytext) + } + + if collectText { + textBuilder.Write(yytext) + } +) + +// Define a constant for end-of-file +const eof = 0 + +// These globals must be accessible to the goyacc parser. +// Bear in mind that the xx prefix is used because the parser is generated +// with this prefix in order to avoid name collisions. +var yylval *xxSymType +var err error // Actually, this is global for lexer action access. + +%} + +%option reentrant bison-bridge +%option nounput +%option never-interactive +%option yylineno +%option prefix="yara_yy" + +%option verbose +%option warn + +%x str +%x regexp +%x include +%x comment + +digit [0-9] +letter [a-zA-Z] +hexdigit [a-fA-F0-9] +octdigit [0-7] + +%% + +"condition" { + collect = true + conditionBuilder.Reset() + return _CONDITION_ +} + +".." { return _DOT_DOT_; } +"<" { return _LT_; } +">" { return _GT_; } +"<=" { return _LE_; } +">=" { return _GE_; } +"==" { return _EQ_; } +"!=" { return _NEQ_; } +"<<" { return _SHIFT_LEFT_; } +">>" { return _SHIFT_RIGHT_; } +"private" { return _PRIVATE_; } +"global" { return _GLOBAL_; } +"rule" { return _RULE_; } +"meta" { return _META_; } +"strings" { return _STRINGS_; } +"ascii" { return _ASCII_; } +"wide" { return _WIDE_; } +"xor" { return _XOR_; } +"fullword" { return _FULLWORD_; } +"nocase" { return _NOCASE_; } +"condition" { return _CONDITION_; } +"true" { return _TRUE_; } +"false" { return _FALSE_; } +"not" { return _NOT_; } +"and" { return _AND_; } +"or" { return _OR_; } +"at" { return _AT_; } +"in" { return _IN_; } +"of" { return _OF_; } +"them" { return _THEM_; } +"for" { return _FOR_; } +"all" { return _ALL_; } +"any" { return _ANY_; } +"entrypoint" { return _ENTRYPOINT_; } +"filesize" { return _FILESIZE_; } +"matches" { return _MATCHES_; } +"contains" { return _CONTAINS_; } +"import" { return _IMPORT_; } + +"{" { return _LBRACE_; } +"}" { + if collect { + collect = false + } + return _RBRACE_; + } +"include" { return _INCLUDE_; } + +"/*" { BEGIN (comment); } +"*/" { BEGIN (INITIAL); } +(.|\n) { /* skip comments */ } + + +"//"[^\n]* { /* skip single-line comments */ } + +<> { return eof } + + +$({letter}|{digit}|_)*"*" { + yylval.s = string(yytext) + return _STRING_IDENTIFIER_WITH_WILDCARD_; +} + + +$({letter}|{digit}|_)* { + yylval.s = string(yytext) + return _STRING_IDENTIFIER_; +} + + +#({letter}|{digit}|_)* { + return _STRING_COUNT_; +} + + +@({letter}|{digit}|_)* { + return _STRING_OFFSET_; +} + + +!({letter}|{digit}|_)* { + return _STRING_LENGTH_; +} + + +u?int(8|16|32)(be)? { + return _INTEGER_FUNCTION_; +} + + +({letter}|_)({letter}|{digit}|_)* { + yylval.s = string(yytext) + return _IDENTIFIER_; +} + + +{digit}+(MB|KB){0,1} { + var err error + s := strings.TrimRight(YYtext, "MKB") + yylval.i64, err = strconv.ParseInt(s, 10, 64) + if err != nil { + panic(fmt.Errorf("Number conversion error: %s", err)) + } + + if strings.HasSuffix(YYtext, "KB") { + if yylval.i64 > math.MaxInt64 / 1024 { + err := fmt.Errorf("Integer overflow: %s; max %d", + YYtext, math.MaxInt64) + panic(err) + } else { + yylval.i64 *= 1024 + } + } else if strings.HasSuffix(YYtext, "MB") { + if yylval.i64 > math.MaxInt64 / 1048576 { + err := fmt.Errorf("Integer overflow: %s; max %d", + YYtext, math.MaxInt64) + panic(err) + } else { + yylval.i64 *= 1048576 + } + } + return _NUMBER_; +} + +{digit}+"."{digit}+ { + return _DOUBLE_; +} + +0x{hexdigit}+ { + var err error + yylval.i64, err = strconv.ParseInt(YYtext, 0, 64) + if err != nil { + panic(fmt.Errorf("Number conversion error: %s", err)) + } + + return _NUMBER_; +} + +0o{octdigit}+ { + var err error + s := strings.TrimLeft(YYtext, "0o") + yylval.i64, err = strconv.ParseInt(s, 8, 64) + if err != nil { + panic(fmt.Errorf("Number conversion error: %s", err)) + } + return _NUMBER_; +} + + +\" { /* saw closing quote - all done */ + + // NOTE: textBuilder.String() will end with `"` char + collectText = false + yylval.s = strings.TrimSuffix(textBuilder.String(), `"`) + + BEGIN (INITIAL); + + return _TEXT_STRING_; +} + + +\\t { +} + + +\\n { +} + + +\\\" { +} + + +\\\\ { +} + + +\\x{hexdigit}{2} { +} + + +[^\\\n\"]+ { } + + +\n { + panic(fmt.Errorf("unterminated string")) +} + + +\\(.|\n) { + panic(fmt.Errorf("illegal escape sequence")) +} + + +\/i?s? { + collectText = false + + txt := strings.TrimSuffix(textBuilder.String(), YYtext) + + var mods data.StringModifiers + + for _, c := range YYtext { + switch c { + case 'i': + mods.I = true + + case 's': + mods.S = true + + case '/': + // Ignore + + default: + // Should be impossible + panic(fmt.Errorf("Invalid regex modifier: %c", c)) + } + } + + yylval.reg = regexPair { + txt, + mods, + } + + BEGIN (INITIAL); + return _REGEXP_; +} + + +\\\/ { +} + + +\\. { +} + + +[^/\n\\]+ { } + + +\n { + panic(fmt.Errorf("unterminated regular expression")) +} + + +\" { + collectText = true + textBuilder.Reset() + BEGIN (str); +} + + +"/" { + collectText = true + textBuilder.Reset() + BEGIN (regexp); +} + + +\{(({hexdigit}|[ \-|\?\[\]\(\)\n\r\t]|\/\*[^*]*\*\/)+|\/\/.*)+\} { + // Match hex-digits with whitespace or comments. The latter are stripped + // out by hex_lexer.l + + // NOTE: The above comment may not apply. We plan to not use hex_lexer.l + + // No need to collect like str and regexp start conditions + yylval.s = strings.Trim(YYtext, "{}") + + return _HEX_STRING_; +} + + +[ \t\r\n] /* skip whitespace */ + +. { + + r := int(yytext[0]) + + if r >= 32 && r < 127 { + return r + } + + panic(fmt.Errorf("non-ascii byte '%d'", r)) +} + +%% diff --git a/grammar/parser.go b/grammar/parser.go new file mode 100644 index 0000000..674832e --- /dev/null +++ b/grammar/parser.go @@ -0,0 +1,1488 @@ +// Code generated by goyacc -p xx -o /grammar/parser.go /grammar/grammar.y. DO NOT EDIT. + +//line /grammar/grammar.y:31 +package grammar + +import __yyfmt__ "fmt" + +//line /grammar/grammar.y:31 +import ( + "fmt" + "strings" + + "github.com/VirusTotal/go-yara-parser/data" +) + +var ParsedRuleset data.RuleSet + +type regexPair struct { + text string + mods data.StringModifiers +} + +//line /grammar/grammar.y:130 +type xxSymType struct { + yys int + i64 int64 + s string + ss []string + + rm data.RuleModifiers + m data.Metas + mp data.Meta + mps data.Metas + mod data.StringModifiers + reg regexPair + ys data.String + yss data.Strings + yr data.Rule +} + +const _END_OF_INCLUDED_FILE_ = 57346 +const _DOT_DOT_ = 57347 +const _RULE_ = 57348 +const _PRIVATE_ = 57349 +const _GLOBAL_ = 57350 +const _META_ = 57351 +const _STRINGS_ = 57352 +const _CONDITION_ = 57353 +const _IDENTIFIER_ = 57354 +const _STRING_IDENTIFIER_ = 57355 +const _STRING_COUNT_ = 57356 +const _STRING_OFFSET_ = 57357 +const _STRING_LENGTH_ = 57358 +const _STRING_IDENTIFIER_WITH_WILDCARD_ = 57359 +const _NUMBER_ = 57360 +const _DOUBLE_ = 57361 +const _INTEGER_FUNCTION_ = 57362 +const _TEXT_STRING_ = 57363 +const _HEX_STRING_ = 57364 +const _REGEXP_ = 57365 +const _ASCII_ = 57366 +const _WIDE_ = 57367 +const _XOR_ = 57368 +const _NOCASE_ = 57369 +const _FULLWORD_ = 57370 +const _AT_ = 57371 +const _FILESIZE_ = 57372 +const _ENTRYPOINT_ = 57373 +const _ALL_ = 57374 +const _ANY_ = 57375 +const _IN_ = 57376 +const _OF_ = 57377 +const _FOR_ = 57378 +const _THEM_ = 57379 +const _MATCHES_ = 57380 +const _CONTAINS_ = 57381 +const _IMPORT_ = 57382 +const _TRUE_ = 57383 +const _FALSE_ = 57384 +const _LBRACE_ = 57385 +const _RBRACE_ = 57386 +const _INCLUDE_ = 57387 +const _OR_ = 57388 +const _AND_ = 57389 +const _EQ_ = 57390 +const _NEQ_ = 57391 +const _LT_ = 57392 +const _LE_ = 57393 +const _GT_ = 57394 +const _GE_ = 57395 +const _SHIFT_LEFT_ = 57396 +const _SHIFT_RIGHT_ = 57397 +const _NOT_ = 57398 +const UNARY_MINUS = 57399 + +var xxToknames = [...]string{ + "$end", + "error", + "$unk", + "_END_OF_INCLUDED_FILE_", + "_DOT_DOT_", + "_RULE_", + "_PRIVATE_", + "_GLOBAL_", + "_META_", + "_STRINGS_", + "_CONDITION_", + "_IDENTIFIER_", + "_STRING_IDENTIFIER_", + "_STRING_COUNT_", + "_STRING_OFFSET_", + "_STRING_LENGTH_", + "_STRING_IDENTIFIER_WITH_WILDCARD_", + "_NUMBER_", + "_DOUBLE_", + "_INTEGER_FUNCTION_", + "_TEXT_STRING_", + "_HEX_STRING_", + "_REGEXP_", + "_ASCII_", + "_WIDE_", + "_XOR_", + "_NOCASE_", + "_FULLWORD_", + "_AT_", + "_FILESIZE_", + "_ENTRYPOINT_", + "_ALL_", + "_ANY_", + "_IN_", + "_OF_", + "_FOR_", + "_THEM_", + "_MATCHES_", + "_CONTAINS_", + "_IMPORT_", + "_TRUE_", + "_FALSE_", + "_LBRACE_", + "_RBRACE_", + "_INCLUDE_", + "_OR_", + "_AND_", + "'|'", + "'^'", + "'&'", + "_EQ_", + "_NEQ_", + "_LT_", + "_LE_", + "_GT_", + "_GE_", + "_SHIFT_LEFT_", + "_SHIFT_RIGHT_", + "'+'", + "'-'", + "'*'", + "'\\\\'", + "'%'", + "_NOT_", + "'~'", + "UNARY_MINUS", + "':'", + "'='", + "'.'", + "'['", + "']'", + "'('", + "')'", + "','", +} +var xxStatenames = [...]string{} + +const xxEofCode = 1 +const xxErrCode = 2 +const xxInitialStackSize = 16 + +//line /grammar/grammar.y:748 + +//line yacctab:1 +var xxExca = [...]int{ + -1, 1, + 1, -1, + -2, 15, + -1, 42, + 23, 34, + -2, 32, + -1, 52, + 35, 93, + -2, 79, + -1, 107, + 35, 93, + -2, 79, + -1, 158, + 73, 50, + 74, 50, + -2, 53, + -1, 186, + 73, 51, + 74, 51, + -2, 53, +} + +const xxPrivate = 57344 + +const xxLast = 405 + +var xxAct = [...]int{ + + 52, 183, 108, 140, 49, 146, 159, 172, 73, 53, + 64, 65, 66, 171, 61, 62, 60, 63, 149, 74, + 70, 201, 202, 192, 193, 204, 58, 59, 71, 72, + 80, 79, 54, 112, 113, 198, 114, 50, 51, 96, + 94, 95, 148, 48, 80, 79, 189, 141, 97, 98, + 89, 90, 91, 92, 93, 102, 68, 208, 107, 105, + 56, 69, 106, 42, 194, 109, 195, 111, 57, 115, + 116, 206, 110, 79, 38, 181, 40, 147, 89, 90, + 91, 92, 93, 122, 123, 124, 125, 126, 127, 128, + 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, + 139, 28, 121, 26, 145, 17, 91, 92, 93, 55, + 151, 152, 153, 39, 155, 18, 81, 82, 104, 158, + 80, 79, 161, 162, 164, 160, 96, 94, 95, 87, + 88, 83, 85, 84, 86, 97, 98, 89, 90, 91, + 92, 93, 163, 97, 98, 89, 90, 91, 92, 93, + 165, 150, 73, 5, 64, 65, 66, 35, 61, 62, + 60, 63, 44, 74, 101, 43, 142, 30, 117, 74, + 58, 59, 71, 72, 99, 118, 143, 186, 77, 100, + 187, 13, 8, 184, 190, 46, 47, 185, 154, 7, + 197, 78, 41, 36, 4, 199, 31, 37, 23, 144, + 68, 203, 20, 205, 45, 69, 14, 207, 33, 182, + 25, 73, 103, 64, 65, 66, 22, 61, 62, 60, + 63, 166, 74, 96, 94, 95, 9, 11, 12, 58, + 59, 196, 97, 98, 89, 90, 91, 92, 93, 120, + 96, 94, 95, 175, 174, 178, 176, 177, 167, 97, + 98, 89, 90, 91, 92, 93, 119, 191, 200, 68, + 188, 180, 157, 156, 69, 150, 96, 94, 95, 67, + 76, 103, 75, 32, 27, 97, 98, 89, 90, 91, + 92, 93, 96, 94, 95, 15, 1, 6, 10, 170, + 173, 97, 98, 89, 90, 91, 92, 93, 96, 94, + 95, 179, 34, 24, 29, 169, 21, 97, 98, 89, + 90, 91, 92, 93, 81, 82, 19, 16, 2, 3, + 0, 168, 0, 0, 96, 94, 95, 87, 88, 83, + 85, 84, 86, 97, 98, 89, 90, 91, 92, 93, + 0, 0, 0, 0, 96, 94, 95, 0, 0, 0, + 0, 0, 0, 97, 98, 89, 90, 91, 92, 93, + 96, 94, 95, 0, 0, 0, 0, 0, 0, 97, + 98, 89, 90, 91, 92, 93, 94, 95, 0, 0, + 0, 0, 0, 0, 97, 98, 89, 90, 91, 92, + 93, 95, 0, 0, 0, 0, 0, 0, 97, 98, + 89, 90, 91, 92, 93, +} +var xxPact = [...]int{ + + -1000, 149, -1000, -1000, 161, -1000, 220, 160, -1000, 194, + -1000, -1000, -1000, -1000, -1000, 38, 72, 190, 207, 186, + -1000, 200, 36, -1000, -1000, 34, 184, 197, 180, 184, + -1000, 6, 69, 9, 180, -1000, -5, -1000, 144, -1000, + -4, -1000, 156, -1000, -1000, 173, -1000, -1000, 74, -1000, + -1000, -1000, 276, 145, 140, 83, -4, -4, -1000, -1000, + -7, -1000, -1000, -1000, -1000, 2, -3, -36, 199, 199, + -1000, -1000, -1000, -1000, -1000, 147, 152, -1000, -1000, -1000, + -1000, 146, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + -25, 164, 312, 199, 5, -1000, -55, 78, 74, 199, + 199, 199, 176, 199, -4, -1000, -1000, -1000, -1000, -4, + -4, -1000, 312, 312, 312, 312, 312, 312, 312, 45, + 45, -1000, -1000, -1000, 341, 86, 327, 19, 19, 312, + -1000, 199, -1000, 90, 5, 192, -1000, -1000, -1000, -1000, + -1000, 175, 250, 234, -1000, 218, -60, -67, -1000, 219, + 219, -1000, 26, 296, -1000, 8, 170, -1000, -1000, -1000, + -1000, -1000, -4, -1000, -1000, -1000, -1000, -1000, -1000, 199, + -26, -1000, -50, -1000, -1000, -1000, -1000, -9, -1, 199, + -1000, -37, -1000, 170, -1000, -1000, -52, 296, -4, -1000, + -47, -1000, 199, -2, -4, 312, -1000, -16, -1000, +} +var xxPgo = [...]int{ + + 0, 319, 318, 317, 316, 306, 304, 167, 303, 302, + 157, 290, 6, 288, 287, 286, 285, 274, 273, 2, + 272, 270, 269, 0, 263, 262, 4, 20, 3, 109, + 261, 260, 258, 5, 257, 256, 239, 231, 221, 209, + 1, +} +var xxR1 = [...]int{ + + 0, 15, 15, 15, 15, 15, 1, 16, 17, 2, + 5, 5, 8, 8, 18, 14, 14, 13, 13, 3, + 3, 4, 4, 6, 6, 7, 7, 7, 7, 7, + 9, 9, 20, 10, 21, 10, 10, 12, 12, 11, + 11, 11, 11, 11, 22, 22, 22, 22, 24, 24, + 25, 25, 27, 19, 26, 26, 26, 26, 26, 26, + 26, 26, 30, 32, 26, 34, 26, 26, 26, 35, + 26, 36, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 31, 31, 28, 37, 37, 38, 33, 33, 39, + 39, 40, 40, 29, 29, 29, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, +} +var xxR2 = [...]int{ + + 0, 0, 2, 2, 3, 2, 2, 0, 0, 11, + 0, 3, 0, 3, 3, 0, 2, 1, 1, 0, + 2, 1, 2, 1, 2, 3, 3, 4, 3, 3, + 1, 2, 0, 5, 0, 5, 3, 0, 2, 1, + 1, 1, 1, 1, 1, 3, 4, 4, 0, 1, + 1, 3, 1, 1, 1, 1, 3, 3, 1, 3, + 3, 3, 0, 0, 11, 0, 9, 3, 2, 0, + 4, 0, 4, 3, 3, 3, 3, 3, 3, 1, + 3, 3, 1, 5, 1, 3, 0, 4, 1, 1, + 3, 1, 1, 1, 1, 1, 3, 1, 1, 4, + 1, 1, 1, 1, 4, 1, 4, 1, 1, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, + 3, 1, +} +var xxChk = [...]int{ + + -1000, -15, -2, -1, 45, 4, -14, 40, 21, 6, + -13, 7, 8, 21, 12, -16, -3, 67, 43, -4, + 12, -5, 9, 12, -8, 10, 67, -17, 67, -6, + -7, 12, -18, 11, -9, -10, 13, -7, 68, 44, + 67, -10, 68, 21, 18, 60, 41, 42, -19, -26, + 41, 42, -23, 13, 36, -29, 64, 72, 30, 31, + 20, 18, 19, 21, 14, 15, 16, -22, 60, 65, + -27, 32, 33, 12, 23, -20, -21, 22, 18, 47, + 46, 38, 39, 53, 55, 54, 56, 51, 52, 59, + 60, 61, 62, 63, 49, 50, 48, 57, 58, 29, + 34, -29, -23, 72, 35, -19, -26, -23, -19, 72, + 70, 70, 69, 70, 72, -23, -23, 21, 23, -35, + -36, -27, -23, -23, -23, -23, -23, -23, -23, -23, + -23, -23, -23, -23, -23, -23, -23, -23, -23, -23, + -28, 72, 2, 12, 35, -23, -33, 72, 37, 73, + 73, -23, -23, -23, 12, -23, -24, -25, -26, -12, + -12, -19, -19, -23, 34, -33, -38, 73, 71, 71, + 71, 73, 74, -11, 25, 24, 27, 28, 26, 5, + -30, 67, -39, -40, 13, 17, -26, -23, -31, 72, + -28, -34, 73, 74, 73, 67, -37, -23, 72, -40, + -32, 73, 74, -19, 72, -23, 73, -19, 73, +} +var xxDef = [...]int{ + + 1, -2, 2, 3, 0, 5, 0, 0, 4, 0, + 16, 17, 18, 6, 7, 19, 0, 0, 10, 20, + 21, 12, 0, 22, 8, 0, 0, 0, 0, 11, + 23, 0, 0, 0, 13, 30, 0, 24, 0, 9, + 0, 31, -2, 25, 26, 0, 28, 29, 14, 53, + 54, 55, -2, 58, 0, 0, 0, 0, 97, 98, + 0, 100, 101, 102, 103, 105, 107, 108, 0, 0, + 121, 94, 95, 44, 52, 0, 0, 36, 27, 69, + 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 93, 0, 0, 68, 53, -2, 0, 0, + 0, 0, 0, 0, 48, 109, 118, 37, 37, 0, + 0, 56, 57, 73, 74, 75, 76, 77, 78, 110, + 111, 112, 113, 114, 115, 116, 117, 119, 120, 59, + 60, 0, 61, 0, 0, 0, 67, 86, 88, 80, + 96, 0, 0, 0, 45, 0, 0, 49, -2, 33, + 35, 70, 72, 0, 62, 0, 0, 99, 104, 106, + 46, 47, 0, 38, 39, 40, 41, 42, 43, 0, + 0, 65, 0, 89, 91, 92, -2, 0, 0, 0, + 82, 0, 87, 0, 83, 63, 0, 84, 0, 90, + 0, 81, 0, 0, 0, 85, 66, 0, 64, +} +var xxTok1 = [...]int{ + + 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 63, 50, 3, + 72, 73, 61, 59, 74, 60, 69, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 67, 3, + 3, 68, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 70, 62, 71, 49, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 48, 3, 65, +} +var xxTok2 = [...]int{ + + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 51, 52, 53, 54, + 55, 56, 57, 58, 64, 66, +} +var xxTok3 = [...]int{ + 0, +} + +var xxErrorMessages = [...]struct { + state int + token int + msg string +}{} + +//line yaccpar:1 + +/* parser for yacc output */ + +var ( + xxDebug = 0 + xxErrorVerbose = false +) + +type xxLexer interface { + Lex(lval *xxSymType) int + Error(s string) +} + +type xxParser interface { + Parse(xxLexer) int + Lookahead() int +} + +type xxParserImpl struct { + lval xxSymType + stack [xxInitialStackSize]xxSymType + char int +} + +func (p *xxParserImpl) Lookahead() int { + return p.char +} + +func xxNewParser() xxParser { + return &xxParserImpl{} +} + +const xxFlag = -1000 + +func xxTokname(c int) string { + if c >= 1 && c-1 < len(xxToknames) { + if xxToknames[c-1] != "" { + return xxToknames[c-1] + } + } + return __yyfmt__.Sprintf("tok-%v", c) +} + +func xxStatname(s int) string { + if s >= 0 && s < len(xxStatenames) { + if xxStatenames[s] != "" { + return xxStatenames[s] + } + } + return __yyfmt__.Sprintf("state-%v", s) +} + +func xxErrorMessage(state, lookAhead int) string { + const TOKSTART = 4 + + if !xxErrorVerbose { + return "syntax error" + } + + for _, e := range xxErrorMessages { + if e.state == state && e.token == lookAhead { + return "syntax error: " + e.msg + } + } + + res := "syntax error: unexpected " + xxTokname(lookAhead) + + // To match Bison, suggest at most four expected tokens. + expected := make([]int, 0, 4) + + // Look for shiftable tokens. + base := xxPact[state] + for tok := TOKSTART; tok-1 < len(xxToknames); tok++ { + if n := base + tok; n >= 0 && n < xxLast && xxChk[xxAct[n]] == tok { + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) + } + } + + if xxDef[state] == -2 { + i := 0 + for xxExca[i] != -1 || xxExca[i+1] != state { + i += 2 + } + + // Look for tokens that we accept or reduce. + for i += 2; xxExca[i] >= 0; i += 2 { + tok := xxExca[i] + if tok < TOKSTART || xxExca[i+1] == 0 { + continue + } + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) + } + + // If the default action is to accept or reduce, give up. + if xxExca[i+1] != 0 { + return res + } + } + + for i, tok := range expected { + if i == 0 { + res += ", expecting " + } else { + res += " or " + } + res += xxTokname(tok) + } + return res +} + +func xxlex1(lex xxLexer, lval *xxSymType) (char, token int) { + token = 0 + char = lex.Lex(lval) + if char <= 0 { + token = xxTok1[0] + goto out + } + if char < len(xxTok1) { + token = xxTok1[char] + goto out + } + if char >= xxPrivate { + if char < xxPrivate+len(xxTok2) { + token = xxTok2[char-xxPrivate] + goto out + } + } + for i := 0; i < len(xxTok3); i += 2 { + token = xxTok3[i+0] + if token == char { + token = xxTok3[i+1] + goto out + } + } + +out: + if token == 0 { + token = xxTok2[1] /* unknown char */ + } + if xxDebug >= 3 { + __yyfmt__.Printf("lex %s(%d)\n", xxTokname(token), uint(char)) + } + return char, token +} + +func xxParse(xxlex xxLexer) int { + return xxNewParser().Parse(xxlex) +} + +func (xxrcvr *xxParserImpl) Parse(xxlex xxLexer) int { + var xxn int + var xxVAL xxSymType + var xxDollar []xxSymType + _ = xxDollar // silence set and not used + xxS := xxrcvr.stack[:] + + Nerrs := 0 /* number of errors */ + Errflag := 0 /* error recovery flag */ + xxstate := 0 + xxrcvr.char = -1 + xxtoken := -1 // xxrcvr.char translated into internal numbering + defer func() { + // Make sure we report no lookahead when not parsing. + xxstate = -1 + xxrcvr.char = -1 + xxtoken = -1 + }() + xxp := -1 + goto xxstack + +ret0: + return 0 + +ret1: + return 1 + +xxstack: + /* put a state and value onto the stack */ + if xxDebug >= 4 { + __yyfmt__.Printf("char %v in %v\n", xxTokname(xxtoken), xxStatname(xxstate)) + } + + xxp++ + if xxp >= len(xxS) { + nyys := make([]xxSymType, len(xxS)*2) + copy(nyys, xxS) + xxS = nyys + } + xxS[xxp] = xxVAL + xxS[xxp].yys = xxstate + +xxnewstate: + xxn = xxPact[xxstate] + if xxn <= xxFlag { + goto xxdefault /* simple state */ + } + if xxrcvr.char < 0 { + xxrcvr.char, xxtoken = xxlex1(xxlex, &xxrcvr.lval) + } + xxn += xxtoken + if xxn < 0 || xxn >= xxLast { + goto xxdefault + } + xxn = xxAct[xxn] + if xxChk[xxn] == xxtoken { /* valid shift */ + xxrcvr.char = -1 + xxtoken = -1 + xxVAL = xxrcvr.lval + xxstate = xxn + if Errflag > 0 { + Errflag-- + } + goto xxstack + } + +xxdefault: + /* default state action */ + xxn = xxDef[xxstate] + if xxn == -2 { + if xxrcvr.char < 0 { + xxrcvr.char, xxtoken = xxlex1(xxlex, &xxrcvr.lval) + } + + /* look through exception table */ + xi := 0 + for { + if xxExca[xi+0] == -1 && xxExca[xi+1] == xxstate { + break + } + xi += 2 + } + for xi += 2; ; xi += 2 { + xxn = xxExca[xi+0] + if xxn < 0 || xxn == xxtoken { + break + } + } + xxn = xxExca[xi+1] + if xxn < 0 { + goto ret0 + } + } + if xxn == 0 { + /* error ... attempt to resume parsing */ + switch Errflag { + case 0: /* brand new error */ + xxlex.Error(xxErrorMessage(xxstate, xxtoken)) + Nerrs++ + if xxDebug >= 1 { + __yyfmt__.Printf("%s", xxStatname(xxstate)) + __yyfmt__.Printf(" saw %s\n", xxTokname(xxtoken)) + } + fallthrough + + case 1, 2: /* incompletely recovered error ... try again */ + Errflag = 3 + + /* find a state where "error" is a legal shift action */ + for xxp >= 0 { + xxn = xxPact[xxS[xxp].yys] + xxErrCode + if xxn >= 0 && xxn < xxLast { + xxstate = xxAct[xxn] /* simulate a shift of "error" */ + if xxChk[xxstate] == xxErrCode { + goto xxstack + } + } + + /* the current p has no shift on "error", pop stack */ + if xxDebug >= 2 { + __yyfmt__.Printf("error recovery pops state %d\n", xxS[xxp].yys) + } + xxp-- + } + /* there is no state on the stack with an error shift ... abort */ + goto ret1 + + case 3: /* no shift yet; clobber input char */ + if xxDebug >= 2 { + __yyfmt__.Printf("error recovery discards %s\n", xxTokname(xxtoken)) + } + if xxtoken == xxEofCode { + goto ret1 + } + xxrcvr.char = -1 + xxtoken = -1 + goto xxnewstate /* try again in the same state */ + } + } + + /* reduction by production xxn */ + if xxDebug >= 2 { + __yyfmt__.Printf("reduce %v in:\n\t%v\n", xxn, xxStatname(xxstate)) + } + + xxnt := xxn + xxpt := xxp + _ = xxpt // guard against "declared and not used" + + xxp -= xxR2[xxn] + // xxp is now the index of $0. Perform the default action. Iff the + // reduced production is ε, $1 is possibly out of range. + if xxp+1 >= len(xxS) { + nyys := make([]xxSymType, len(xxS)*2) + copy(nyys, xxS) + xxS = nyys + } + xxVAL = xxS[xxp+1] + + /* consult goto table to find next state */ + xxn = xxR1[xxn] + xxg := xxPgo[xxn] + xxj := xxg + xxS[xxp].yys + 1 + + if xxj >= xxLast { + xxstate = xxAct[xxg] + } else { + xxstate = xxAct[xxj] + if xxChk[xxstate] != -xxn { + xxstate = xxAct[xxg] + } + } + // dummy call; replaced with literal code + switch xxnt { + + case 2: + xxDollar = xxS[xxpt-2 : xxpt+1] +//line /grammar/grammar.y:151 + { + ParsedRuleset.Rules = append(ParsedRuleset.Rules, xxDollar[2].yr) + } + case 3: + xxDollar = xxS[xxpt-2 : xxpt+1] +//line /grammar/grammar.y:154 + { + ParsedRuleset.Imports = append(ParsedRuleset.Imports, xxDollar[2].s) + } + case 4: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:157 + { + ParsedRuleset.Includes = append(ParsedRuleset.Includes, xxDollar[3].s) + } + case 5: + xxDollar = xxS[xxpt-2 : xxpt+1] +//line /grammar/grammar.y:160 + { + } + case 6: + xxDollar = xxS[xxpt-2 : xxpt+1] +//line /grammar/grammar.y:166 + { + xxVAL.s = xxDollar[2].s + } + case 7: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:174 + { + xxVAL.yr.Modifiers = xxDollar[1].rm + xxVAL.yr.Identifier = xxDollar[3].s + + // Forbid duplicate rules + for _, r := range ParsedRuleset.Rules { + if xxDollar[3].s == r.Identifier { + err := fmt.Errorf(`Duplicate rule "%s"`, xxDollar[3].s) + panic(err) + } + } + } + case 8: + xxDollar = xxS[xxpt-8 : xxpt+1] +//line /grammar/grammar.y:187 + { + // $4 is the rule created in above action + xxDollar[4].yr.Tags = xxDollar[5].ss + + // Forbid duplicate tags + idx := make(map[string]struct{}) + for _, t := range xxDollar[5].ss { + if _, had := idx[t]; had { + msg := fmt.Sprintf(`grammar: Rule "%s" has duplicate tag "%s"`, + xxDollar[4].yr.Identifier, + t) + panic(msg) + } + idx[t] = struct{}{} + } + + xxDollar[4].yr.Meta = xxDollar[7].m + + xxDollar[4].yr.Strings = xxDollar[8].yss + + // Forbid duplicate string IDs, except `$` (anonymous) + idx = make(map[string]struct{}) + for _, s := range xxDollar[8].yss { + if s.ID == "$" { + continue + } + if _, had := idx[s.ID]; had { + msg := fmt.Sprintf( + `grammar: Rule "%s" has duplicated string "%s"`, + xxDollar[4].yr.Identifier, + s.ID) + panic(msg) + } + idx[s.ID] = struct{}{} + } + } + case 9: + xxDollar = xxS[xxpt-11 : xxpt+1] +//line /grammar/grammar.y:224 + { + c := conditionBuilder.String() + c = strings.TrimLeft(c, ":\n\r\t ") + c = strings.TrimRight(c, "}\n\r\t ") + xxDollar[4].yr.Condition = c + xxVAL.yr = xxDollar[4].yr + } + case 10: + xxDollar = xxS[xxpt-0 : xxpt+1] +//line /grammar/grammar.y:236 + { + + } + case 11: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:240 + { + xxVAL.m = make(data.Metas, 0, len(xxDollar[3].mps)) + for _, mpair := range xxDollar[3].mps { + // YARA is ok with duplicate keys; we follow suit + xxVAL.m = append(xxVAL.m, mpair) + } + } + case 12: + xxDollar = xxS[xxpt-0 : xxpt+1] +//line /grammar/grammar.y:252 + { + xxVAL.yss = data.Strings{} + } + case 13: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:256 + { + xxVAL.yss = xxDollar[3].yss + } + case 15: + xxDollar = xxS[xxpt-0 : xxpt+1] +//line /grammar/grammar.y:268 + { + xxVAL.rm = data.RuleModifiers{} + } + case 16: + xxDollar = xxS[xxpt-2 : xxpt+1] +//line /grammar/grammar.y:269 + { + xxVAL.rm.Private = xxVAL.rm.Private || xxDollar[2].rm.Private + xxVAL.rm.Global = xxVAL.rm.Global || xxDollar[2].rm.Global + } + case 17: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:277 + { + xxVAL.rm.Private = true + } + case 18: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:278 + { + xxVAL.rm.Global = true + } + case 19: + xxDollar = xxS[xxpt-0 : xxpt+1] +//line /grammar/grammar.y:284 + { + xxVAL.ss = []string{} + } + case 20: + xxDollar = xxS[xxpt-2 : xxpt+1] +//line /grammar/grammar.y:288 + { + xxVAL.ss = xxDollar[2].ss + } + case 21: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:296 + { + xxVAL.ss = []string{xxDollar[1].s} + } + case 22: + xxDollar = xxS[xxpt-2 : xxpt+1] +//line /grammar/grammar.y:300 + { + xxVAL.ss = append(xxDollar[1].ss, xxDollar[2].s) + } + case 23: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:308 + { + xxVAL.mps = data.Metas{xxDollar[1].mp} + } + case 24: + xxDollar = xxS[xxpt-2 : xxpt+1] +//line /grammar/grammar.y:309 + { + xxVAL.mps = append(xxVAL.mps, xxDollar[2].mp) + } + case 25: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:315 + { + xxVAL.mp = data.Meta{xxDollar[1].s, xxDollar[3].s} + } + case 26: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:319 + { + xxVAL.mp = data.Meta{xxDollar[1].s, xxDollar[3].i64} + } + case 27: + xxDollar = xxS[xxpt-4 : xxpt+1] +//line /grammar/grammar.y:323 + { + xxVAL.mp = data.Meta{xxDollar[1].s, -xxDollar[4].i64} + } + case 28: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:327 + { + xxVAL.mp = data.Meta{xxDollar[1].s, true} + } + case 29: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:331 + { + xxVAL.mp = data.Meta{xxDollar[1].s, false} + } + case 30: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:338 + { + xxVAL.yss = data.Strings{xxDollar[1].ys} + } + case 31: + xxDollar = xxS[xxpt-2 : xxpt+1] +//line /grammar/grammar.y:339 + { + xxVAL.yss = append(xxDollar[1].yss, xxDollar[2].ys) + } + case 32: + xxDollar = xxS[xxpt-2 : xxpt+1] +//line /grammar/grammar.y:345 + { + xxVAL.ys.Type = data.TypeString + xxVAL.ys.ID = xxDollar[1].s + } + case 33: + xxDollar = xxS[xxpt-5 : xxpt+1] +//line /grammar/grammar.y:350 + { + xxDollar[3].ys.Text = xxDollar[4].s + xxDollar[3].ys.Modifiers = xxDollar[5].mod + + xxVAL.ys = xxDollar[3].ys + } + case 34: + xxDollar = xxS[xxpt-2 : xxpt+1] +//line /grammar/grammar.y:357 + { + xxVAL.ys.Type = data.TypeRegex + xxVAL.ys.ID = xxDollar[1].s + } + case 35: + xxDollar = xxS[xxpt-5 : xxpt+1] +//line /grammar/grammar.y:362 + { + xxDollar[3].ys.Text = xxDollar[4].reg.text + + xxDollar[5].mod.I = xxDollar[4].reg.mods.I + xxDollar[5].mod.S = xxDollar[4].reg.mods.S + + xxDollar[3].ys.Modifiers = xxDollar[5].mod + + xxVAL.ys = xxDollar[3].ys + } + case 36: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:373 + { + xxVAL.ys.Type = data.TypeHexString + xxVAL.ys.ID = xxDollar[1].s + xxVAL.ys.Text = xxDollar[3].s + } + case 37: + xxDollar = xxS[xxpt-0 : xxpt+1] +//line /grammar/grammar.y:382 + { + xxVAL.mod = data.StringModifiers{} + } + case 38: + xxDollar = xxS[xxpt-2 : xxpt+1] +//line /grammar/grammar.y:385 + { + xxVAL.mod = data.StringModifiers{ + Wide: xxDollar[1].mod.Wide || xxDollar[2].mod.Wide, + ASCII: xxDollar[1].mod.ASCII || xxDollar[2].mod.ASCII, + Nocase: xxDollar[1].mod.Nocase || xxDollar[2].mod.Nocase, + Fullword: xxDollar[1].mod.Fullword || xxDollar[2].mod.Fullword, + Xor: xxDollar[1].mod.Xor || xxDollar[2].mod.Xor, + } + } + case 39: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:398 + { + xxVAL.mod.Wide = true + } + case 40: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:399 + { + xxVAL.mod.ASCII = true + } + case 41: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:400 + { + xxVAL.mod.Nocase = true + } + case 42: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:401 + { + xxVAL.mod.Fullword = true + } + case 43: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:402 + { + xxVAL.mod.Xor = true + } + case 44: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:408 + { + + } + case 45: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:412 + { + + } + case 46: + xxDollar = xxS[xxpt-4 : xxpt+1] +//line /grammar/grammar.y:416 + { + + } + case 47: + xxDollar = xxS[xxpt-4 : xxpt+1] +//line /grammar/grammar.y:421 + { + + } + case 48: + xxDollar = xxS[xxpt-0 : xxpt+1] +//line /grammar/grammar.y:428 + { + } + case 49: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:429 + { + } + case 50: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:434 + { + + } + case 51: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:438 + { + + } + case 52: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:446 + { + + } + case 53: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:454 + { + + } + case 54: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:461 + { + + } + case 55: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:465 + { + + } + case 56: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:469 + { + + } + case 57: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:473 + { + + } + case 58: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:477 + { + + } + case 59: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:481 + { + + } + case 60: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:485 + { + + } + case 61: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:489 + { + + } + case 62: + xxDollar = xxS[xxpt-4 : xxpt+1] +//line /grammar/grammar.y:493 + { + + } + case 63: + xxDollar = xxS[xxpt-7 : xxpt+1] +//line /grammar/grammar.y:497 + { + + } + case 64: + xxDollar = xxS[xxpt-11 : xxpt+1] +//line /grammar/grammar.y:501 + { + + } + case 65: + xxDollar = xxS[xxpt-5 : xxpt+1] +//line /grammar/grammar.y:505 + { + + } + case 66: + xxDollar = xxS[xxpt-9 : xxpt+1] +//line /grammar/grammar.y:509 + { + + } + case 67: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:513 + { + + } + case 68: + xxDollar = xxS[xxpt-2 : xxpt+1] +//line /grammar/grammar.y:517 + { + + } + case 69: + xxDollar = xxS[xxpt-2 : xxpt+1] +//line /grammar/grammar.y:521 + { + + } + case 70: + xxDollar = xxS[xxpt-4 : xxpt+1] +//line /grammar/grammar.y:525 + { + + } + case 71: + xxDollar = xxS[xxpt-2 : xxpt+1] +//line /grammar/grammar.y:529 + { + + } + case 72: + xxDollar = xxS[xxpt-4 : xxpt+1] +//line /grammar/grammar.y:533 + { + + } + case 73: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:537 + { + + } + case 74: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:541 + { + + } + case 75: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:545 + { + + } + case 76: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:549 + { + + } + case 77: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:553 + { + + } + case 78: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:557 + { + + } + case 79: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:561 + { + + } + case 80: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:565 + { + + } + case 81: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:572 + { + } + case 82: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:573 + { + } + case 83: + xxDollar = xxS[xxpt-5 : xxpt+1] +//line /grammar/grammar.y:579 + { + + } + case 84: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:587 + { + + } + case 85: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:591 + { + + } + case 86: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:599 + { + + } + case 88: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:604 + { + + } + case 91: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:618 + { + + } + case 92: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:622 + { + + } + case 94: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:631 + { + + } + case 95: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:635 + { + + } + case 96: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:643 + { + + } + case 97: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:647 + { + + } + case 98: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:651 + { + + } + case 99: + xxDollar = xxS[xxpt-4 : xxpt+1] +//line /grammar/grammar.y:655 + { + + } + case 100: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:659 + { + + } + case 101: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:663 + { + + } + case 102: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:667 + { + + } + case 103: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:671 + { + + } + case 104: + xxDollar = xxS[xxpt-4 : xxpt+1] +//line /grammar/grammar.y:675 + { + + } + case 105: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:679 + { + + } + case 106: + xxDollar = xxS[xxpt-4 : xxpt+1] +//line /grammar/grammar.y:683 + { + + } + case 107: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:687 + { + + } + case 108: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:691 + { + + } + case 109: + xxDollar = xxS[xxpt-2 : xxpt+1] +//line /grammar/grammar.y:695 + { + + } + case 110: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:699 + { + + } + case 111: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:703 + { + + } + case 112: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:707 + { + + } + case 113: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:711 + { + + } + case 114: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:715 + { + + } + case 115: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:719 + { + + } + case 116: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:723 + { + + } + case 117: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:727 + { + + } + case 118: + xxDollar = xxS[xxpt-2 : xxpt+1] +//line /grammar/grammar.y:731 + { + + } + case 119: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:735 + { + + } + case 120: + xxDollar = xxS[xxpt-3 : xxpt+1] +//line /grammar/grammar.y:739 + { + + } + case 121: + xxDollar = xxS[xxpt-1 : xxpt+1] +//line /grammar/grammar.y:743 + { + + } + } + goto xxstack /* stack new state and value */ +} diff --git a/grammar/re_grammar.y b/grammar/re_grammar.y new file mode 100644 index 0000000..77c608a --- /dev/null +++ b/grammar/re_grammar.y @@ -0,0 +1,434 @@ +/* +Copyright (c) 2013. The YARA Authors. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +%{ + +#include +#include +#include +#include +#include +#include +#include + + +#define YYERROR_VERBOSE + +#define YYMALLOC yr_malloc +#define YYFREE yr_free + +#define mark_as_not_fast_regexp() \ + ((RE_AST*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_FAST_REGEXP + +#define fail_if_too_many_ast_levels(cleanup_code) \ + if (((RE_AST*) yyget_extra(yyscanner))->levels++ > RE_MAX_AST_LEVELS) \ + { \ + { cleanup_code } \ + yyerror(yyscanner, lex_env, "regexp too long"); \ + YYABORT; \ + } + +#define fail_if(x, error) \ + if (x) \ + { \ + lex_env->last_error_code = error; \ + YYABORT; \ + } \ + +#define destroy_node_if(x, node) \ + if (x) \ + { \ + yr_re_node_destroy(node); \ + } \ + +%} + +%name-prefix "re_yy" +%pure-parser + +%parse-param {void *yyscanner} +%parse-param {RE_LEX_ENVIRONMENT *lex_env} + +%lex-param {yyscan_t yyscanner} +%lex-param {RE_LEX_ENVIRONMENT *lex_env} + +%union { + int integer; + uint32_t range; + RE_NODE* re_node; + RE_CLASS* re_class; +} + + +%token _CHAR_ _ANY_ +%token _RANGE_ +%token _CLASS_ + +%token _WORD_CHAR_ +%token _NON_WORD_CHAR_ +%token _SPACE_ +%token _NON_SPACE_ +%token _DIGIT_ +%token _NON_DIGIT_ +%token _WORD_BOUNDARY_ +%token _NON_WORD_BOUNDARY_ + +%type alternative concatenation repeat single + +%destructor { yr_free($$); $$ = NULL; } _CLASS_ +%destructor { yr_re_node_destroy($$); $$ = NULL; } alternative +%destructor { yr_re_node_destroy($$); $$ = NULL; } concatenation +%destructor { yr_re_node_destroy($$); $$ = NULL; } repeat +%destructor { yr_re_node_destroy($$); $$ = NULL; } single + +%% + +re : alternative + { + RE_AST* re_ast = yyget_extra(yyscanner); + re_ast->root_node = $1; + } + | error + ; + +alternative + : concatenation + { + $$ = $1; + } + | alternative '|' concatenation + { + mark_as_not_fast_regexp(); + + fail_if_too_many_ast_levels({ + yr_re_node_destroy($1); + yr_re_node_destroy($3); + }); + + $$ = yr_re_node_create(RE_NODE_ALT, $1, $3); + + destroy_node_if($$ == NULL, $1); + destroy_node_if($$ == NULL, $3); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + | alternative '|' + { + RE_NODE* node; + + mark_as_not_fast_regexp(); + + fail_if_too_many_ast_levels({ + yr_re_node_destroy($1); + }); + + node = yr_re_node_create(RE_NODE_EMPTY, NULL, NULL); + + destroy_node_if($$ == NULL, $1); + fail_if(node == NULL, ERROR_INSUFFICIENT_MEMORY); + + $$ = yr_re_node_create(RE_NODE_ALT, $1, node); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + ; + +concatenation + : repeat + { + $$ = $1; + } + | concatenation repeat + { + fail_if_too_many_ast_levels({ + yr_re_node_destroy($1); + yr_re_node_destroy($2); + }); + + $$ = yr_re_node_create(RE_NODE_CONCAT, $1, $2); + + destroy_node_if($$ == NULL, $1); + destroy_node_if($$ == NULL, $2); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + ; + +repeat + : single '*' + { + RE_AST* re_ast; + + mark_as_not_fast_regexp(); + + re_ast = yyget_extra(yyscanner); + re_ast->flags |= RE_FLAGS_GREEDY; + + $$ = yr_re_node_create(RE_NODE_STAR, $1, NULL); + + destroy_node_if($$ == NULL, $1); + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + | single '*' '?' + { + RE_AST* re_ast; + + mark_as_not_fast_regexp(); + + re_ast = yyget_extra(yyscanner); + re_ast->flags |= RE_FLAGS_UNGREEDY; + + $$ = yr_re_node_create(RE_NODE_STAR, $1, NULL); + + destroy_node_if($$ == NULL, $1); + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + + $$->greedy = FALSE; + } + | single '+' + { + RE_AST* re_ast; + + mark_as_not_fast_regexp(); + + re_ast = yyget_extra(yyscanner); + re_ast->flags |= RE_FLAGS_GREEDY; + + $$ = yr_re_node_create(RE_NODE_PLUS, $1, NULL); + + destroy_node_if($$ == NULL, $1); + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + | single '+' '?' + { + RE_AST* re_ast; + + mark_as_not_fast_regexp(); + + re_ast = yyget_extra(yyscanner); + re_ast->flags |= RE_FLAGS_UNGREEDY; + + $$ = yr_re_node_create(RE_NODE_PLUS, $1, NULL); + + destroy_node_if($$ == NULL, $1); + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + + $$->greedy = FALSE; + } + | single '?' + { + RE_AST* re_ast = yyget_extra(yyscanner); + re_ast->flags |= RE_FLAGS_GREEDY; + + if ($1->type == RE_NODE_ANY) + { + $$ = yr_re_node_create(RE_NODE_RANGE_ANY, NULL, NULL); + destroy_node_if(TRUE, $1); + } + else + { + mark_as_not_fast_regexp(); + $$ = yr_re_node_create(RE_NODE_RANGE, $1, NULL); + destroy_node_if($$ == NULL, $1); + } + + destroy_node_if($$ == NULL, $1); + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + + $$->start = 0; + $$->end = 1; + } + | single '?' '?' + { + RE_AST* re_ast = yyget_extra(yyscanner); + re_ast->flags |= RE_FLAGS_UNGREEDY; + + if ($1->type == RE_NODE_ANY) + { + $$ = yr_re_node_create(RE_NODE_RANGE_ANY, NULL, NULL); + destroy_node_if(TRUE, $1); + } + else + { + mark_as_not_fast_regexp(); + $$ = yr_re_node_create(RE_NODE_RANGE, $1, NULL); + destroy_node_if($$ == NULL, $1); + } + + destroy_node_if($$ == NULL, $1); + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + + $$->start = 0; + $$->end = 1; + $$->greedy = FALSE; + } + | single _RANGE_ + { + RE_AST* re_ast = yyget_extra(yyscanner); + re_ast->flags |= RE_FLAGS_GREEDY; + + if ($1->type == RE_NODE_ANY) + { + $$ = yr_re_node_create(RE_NODE_RANGE_ANY, NULL, NULL); + destroy_node_if(TRUE, $1); + } + else + { + mark_as_not_fast_regexp(); + $$ = yr_re_node_create(RE_NODE_RANGE, $1, NULL); + destroy_node_if($$ == NULL, $1); + } + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + + $$->start = $2 & 0xFFFF;; + $$->end = $2 >> 16;; + } + | single _RANGE_ '?' + { + RE_AST* re_ast = yyget_extra(yyscanner); + re_ast->flags |= RE_FLAGS_UNGREEDY; + + if ($1->type == RE_NODE_ANY) + { + $$ = yr_re_node_create(RE_NODE_RANGE_ANY, NULL, NULL); + destroy_node_if(TRUE, $1); + } + else + { + mark_as_not_fast_regexp(); + $$ = yr_re_node_create(RE_NODE_RANGE, $1, NULL); + destroy_node_if($$ == NULL, $1); + } + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + + $$->start = $2 & 0xFFFF;; + $$->end = $2 >> 16;; + $$->greedy = FALSE; + } + | single + { + $$ = $1; + } + | _WORD_BOUNDARY_ + { + $$ = yr_re_node_create(RE_NODE_WORD_BOUNDARY, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + | _NON_WORD_BOUNDARY_ + { + $$ = yr_re_node_create(RE_NODE_NON_WORD_BOUNDARY, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + | '^' + { + $$ = yr_re_node_create(RE_NODE_ANCHOR_START, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + | '$' + { + $$ = yr_re_node_create(RE_NODE_ANCHOR_END, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + ; + +single + : '(' alternative ')' + { + fail_if_too_many_ast_levels({ + yr_re_node_destroy($2); + }); + + $$ = $2; + } + | '.' + { + $$ = yr_re_node_create(RE_NODE_ANY, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + | _CHAR_ + { + $$ = yr_re_node_create(RE_NODE_LITERAL, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + + $$->value = $1; + } + | _WORD_CHAR_ + { + $$ = yr_re_node_create(RE_NODE_WORD_CHAR, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + | _NON_WORD_CHAR_ + { + $$ = yr_re_node_create(RE_NODE_NON_WORD_CHAR, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + | _SPACE_ + { + $$ = yr_re_node_create(RE_NODE_SPACE, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + | _NON_SPACE_ + { + $$ = yr_re_node_create(RE_NODE_NON_SPACE, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + | _DIGIT_ + { + $$ = yr_re_node_create(RE_NODE_DIGIT, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + | _NON_DIGIT_ + { + $$ = yr_re_node_create(RE_NODE_NON_DIGIT, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + } + | _CLASS_ + { + $$ = yr_re_node_create(RE_NODE_CLASS, NULL, NULL); + + fail_if($$ == NULL, ERROR_INSUFFICIENT_MEMORY); + + $$->re_class = $1; + } + ; +%% diff --git a/grammar/re_lexer.l b/grammar/re_lexer.l new file mode 100644 index 0000000..9ba7d01 --- /dev/null +++ b/grammar/re_lexer.l @@ -0,0 +1,270 @@ +/* +Copyright (c) 2013. The YARA Authors. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Lexical analyzer for regular expressions */ + +%{ + +%} + +%option reentrant bison-bridge +%option noyywrap +%option nounistd +%option nounput +%option never-interactive +%option yylineno +%option prefix="re_yy" + +%option outfile="lex.yy.go" + +%option verbose +%option warn + +%x char_class + +digit [0-9] +hex_digit [0-9a-fA-F] + +%% + +\{{digit}*,{digit}*\} { + return _RANGE_; +} + + +\{{digit}+\} { + return _RANGE_; +} + + +\[\^ { + + // Start of a negated character class. Example: [^abcd] + + BEGIN(char_class); +} + +\[\^\] { + + // Start of character negated class containing a ]. + // Example: [^]abc] this must be interpreted as a class + // not matching ], a, b, nor c + + BEGIN(char_class); +} + + +\[\] { + + // Start of character class containing a ]. + // Example: []abc] this must be interpreted as a class + // matching ], a, b, or c. + + BEGIN(char_class); +} + + +\[ { + + // Start of character class. Example: [abcd] + + BEGIN(char_class); +} + +[^\\\[\(\)\|\$\.\^\+\*\?] { + + // Any non-special character is passed as a CHAR token to the scanner. + + return _CHAR_; +} + + +\\w { + return _WORD_CHAR_; +} + + +\\W { + return _NON_WORD_CHAR_; +} + + +\\s { + return _SPACE_; +} + + +\\S { + return _NON_SPACE_; +} + + +\\d { + return _DIGIT_; +} + + +\\D { + return _NON_DIGIT_; +} + + +\\b { + return _WORD_BOUNDARY_; +} + +\\B { + return _NON_WORD_BOUNDARY_; +} + + +\\{digit}+ { + + /* + yyerror(yyscanner, lex_env, "backreferences are not allowed"); + yyterminate(); + */ +} + + +\\ { + /* + uint8_t c; + + if (read_escaped_char(yyscanner, &c)) + { + yylval->integer = c; + return _CHAR_; + } + else + { + yyerror(yyscanner, lex_env, "illegal escape sequence"); + yyterminate(); + } + */ +} + + +\] { + + // End of character class. + + BEGIN(INITIAL); + return _CLASS_; +} + + + +(\\x{hex_digit}{2}|\\.|[^\\])\-[^]] { + + // A range inside a character class. + // [abc0-9] + // ^- matching here +} + + +\\w { +} + + +\\W { +} + + +\\s { +} + + +\\S { +} + + +\\d { +} + + +\\D { +} + + +\\ { +} + + +. { + + /* + if (yytext[0] >= 32 && yytext[0] < 127) + { + // A character class (i.e: [0-9a-f]) is represented by a 256-bits vector, + // here we set to 1 the vector's bit corresponding to the input character. + + LEX_ENV->re_class.bitmap[yytext[0] / 8] |= 1 << yytext[0] % 8; + } + else + { + yyerror(yyscanner, lex_env, "non-ascii character"); + yyterminate(); + } + */ +} + + +<> { + + // End of regexp reached while scanning a character class. + + /* + yyerror(yyscanner, lex_env, "missing terminating ] for character class"); + yyterminate(); + */ +} + + +. { + + /* + if (yytext[0] >= 32 && yytext[0] < 127) + { + return yytext[0]; + } + else + { + yyerror(yyscanner, lex_env, "non-ascii character"); + yyterminate(); + } + */ +} + + +<> { + +} + +%% diff --git a/tests/duplicate_test.go b/tests/duplicate_test.go new file mode 100644 index 0000000..1ebd593 --- /dev/null +++ b/tests/duplicate_test.go @@ -0,0 +1,105 @@ +package tests + +import ( + "strings" + "testing" +) + +func TestDuplicateRules(t *testing.T) { + const rs = `rule dup { +condition: + true +} + +rule dup { +condition: + false +}` + _, err := parseRuleStr(rs) + if err == nil { + t.Fatalf(`Parsing succeeded; should have failed`) + } else if !strings.Contains(strings.ToLower(err.Error()), "duplicate") { + t.Fatalf(`Error did not mention "duplicate": %s`, err) + } +} + +func TestDuplicateMeta(t *testing.T) { + const rs = `rule dup { +meta: + description = "d1" + description = "d2" + description = 5 + description = "d1" +condition: + true +}` + ruleset, err := parseRuleStr(rs) + if err != nil { + t.Fatalf(`Failed to parse ruleset w/ duplicate metas: %s`, err) + } + + const nrules = 1 + if l := len(ruleset.Rules); l != nrules { + t.Fatalf(`Expected %d rules; found %d`, nrules, l) + } + + var ( + rule = ruleset.Rules[0] + key = "description" + nvals = len(rule.Meta) + ) + const expectedVals = 4 + + if nvals != expectedVals { + t.Fatalf(`Expected %d metas; found %d`, expectedVals, nvals) + } + + for _, meta := range rule.Meta { + if meta.Key != key { + t.Errorf(`Expected all meta keys to be "%s"; found "%s"`, key, meta.Key) + } + } +} + +func TestDuplicateStrings(t *testing.T) { + const rs = `rule dup { +strings: + $s1 = "abc" + $s1 = "def" +condition: + any of them +}` + _, err := parseRuleStr(rs) + if err == nil { + t.Fatalf(`Parsing succeeded; should have failed`) + } else if !strings.Contains(err.Error(), "duplicate") { + t.Fatalf(`Error did not mention "duplicate": %s`, err) + } +} + +func TestDuplicateStringsAnonymous(t *testing.T) { + const rs = `rule dup { +strings: + $ = "abc" + $ = "def" +condition: + any of them +}` + _, err := parseRuleStr(rs) + if err != nil { + t.Fatalf(`Failed to parse: %s`, err) + } +} + +func TestDuplicateTags(t *testing.T) { + const rs = `rule dup : tag1 tag2 tag3 tag1 { +condition: + true +}` + _, err := parseRuleStr(rs) + if err == nil { + t.Fatalf(`Parsing succeeded; should have failed`) + } else if !strings.Contains(err.Error(), "duplicate") { + t.Fatalf(`Error did not mention "duplicate": %s`, err) + } +} diff --git a/tests/for_test.go b/tests/for_test.go new file mode 100644 index 0000000..416a663 --- /dev/null +++ b/tests/for_test.go @@ -0,0 +1,19 @@ +package tests + +import "testing" + +func TestForLoop(t *testing.T) { + const rs = `rule FOR { +strings: + $s1 = "abc" +condition: + for any i in (1..#s1) : + ( + @s1[i] > 20 + ) +}` + _, err := parseRuleStr(rs) + if err != nil { + t.Fatalf(`Parsing failed: %s`, err) + } +} diff --git a/tests/grammar_test.go b/tests/grammar_test.go new file mode 100644 index 0000000..3bbf587 --- /dev/null +++ b/tests/grammar_test.go @@ -0,0 +1,159 @@ +package tests + +import ( + "log" + "os" + "strings" + "testing" + + "github.com/VirusTotal/go-yara-parser/data" + "github.com/VirusTotal/go-yara-parser/grammar" +) + +const testfile = "ruleset.yar" + +var ruleset *data.RuleSet + +func init() { + f, err := os.Open(testfile) + if err != nil { + log.Fatalf(`Unable to open ruleset file "%s": %s`, testfile, err) + } + rs, err := grammar.Parse(f, os.Stderr) + if err != nil { + log.Fatalf(`Unable to parse ruleset file "%s": %s`, testfile, err) + } + + ruleset = &rs +} + +// TestRuleNames verifies rule names are being collected +func TestRuleNames(t *testing.T) { + + const ruleName = "BASIC_BOOL" + + for _, rule := range ruleset.Rules { + if rule.Identifier == ruleName { + return + } + } + + t.Fatalf(`Ruleset "%s" has no rule named "%s"`, testfile, ruleName) +} + +// TestImport verifies imports are being collected +func TestImport(t *testing.T) { + + const i = 1 + if l := len(ruleset.Imports); l < i { + t.Fatalf("Expected > %d imports in file %s; found %d", i, testfile, l) + } +} + +// TestString verifies that strings are being collected +func TestString(t *testing.T) { + + const ( + ruleName = "STRING1" + stringID = "$s1" + ) + for _, rule := range ruleset.Rules { + if rule.Identifier == ruleName { + for _, s := range rule.Strings { + if s.ID == stringID { + return + } + } + t.Fatalf(`Ruleset "%s" rule "%s" has no string "%s"`, + testfile, ruleName, stringID) + } + } + + t.Fatalf(`Ruleset "%s" has no rule "%s" with string "%s"`, + testfile, ruleName, stringID) +} + +// TestGlobal verifies that the global modifier is being collected +func TestGlobal(t *testing.T) { + + const ruleName = "GLOBAL" + for _, rule := range ruleset.Rules { + if rule.Identifier == ruleName { + if rule.Modifiers.Global { + return + } + t.Fatalf(`Ruleset "%s" contains rule "%s" which is not global`, + testfile, ruleName) + } + } + + t.Fatalf(`Ruleset "%s" has no rule "%s"`, testfile, ruleName) +} + +// TestPrivate verifies that the private modifier is being collected +func TestPrivate(t *testing.T) { + + const ruleName = "PRIVATE" + for _, rule := range ruleset.Rules { + if rule.Identifier == ruleName { + if rule.Modifiers.Private { + return + } + t.Fatalf(`Ruleset "%s" contains rule "%s" which is not private`, + testfile, ruleName) + } + } + + t.Fatalf(`Ruleset "%s" has no rule "%s"`, testfile, ruleName) +} + +// TestMeta verifies that metadata is being collected +func TestMeta(t *testing.T) { + + const ruleName = "META" + for _, rule := range ruleset.Rules { + if rule.Identifier == ruleName { + checklist := make(map[string]bool) + for _, kvp := range rule.Meta { + checklist[kvp.Key] = true + } + + expecteds := []string{ + "meta_str", "meta_int", "meta_neg", "meta_true", "meta_false", + } + + for _, expected := range expecteds { + if !checklist[expected] { + t.Errorf(`Ruleset "%s" rule "%s" missing expected meta "%s"`, + testfile, rule.Identifier, expected) + } + } + return + } + } + + t.Fatalf(`Ruleset "%s" has no rule "%s"`, testfile, ruleName) +} + +// TestXor verifies that the xor string modifier works +func TestXor(t *testing.T) { + const ruleName = "XOR" + for _, rule := range ruleset.Rules { + if rule.Identifier == ruleName { + for _, s := range rule.Strings { + const strNamePrefix = "$xor" + if strings.HasPrefix(s.ID, strNamePrefix) { + if !s.Modifiers.Xor { + t.Errorf(`Ruleset "%s" rule "%s" string "%s" xor modifier not found`, + testfile, rule.Identifier, s.ID) + } + } else { + if s.Modifiers.Xor { + t.Errorf(`Ruleset "%s" rule "%s" string "%s" has unexpected xor modifier`, + testfile, rule.Identifier, s.ID) + } + } + } + } + } +} diff --git a/tests/modules_test.go b/tests/modules_test.go new file mode 100644 index 0000000..8cf0e24 --- /dev/null +++ b/tests/modules_test.go @@ -0,0 +1,20 @@ +package tests + +import "testing" + +func TestPEModule(t *testing.T) { + const rs = `import "pe" + +rule is_pe : pe_tag { +meta: + description = "Uses pe module to determine if file is PE" +strings: + $s1 = "MZ" +condition: + $s1 at 0 and pe.imports("kernel32.dll") +}` + _, err := parseRuleStr(rs) + if err != nil { + t.Fatalf("Parsing failed: %s", err) + } +} diff --git a/tests/ruleset.yar b/tests/ruleset.yar new file mode 100644 index 0000000..da7b118 --- /dev/null +++ b/tests/ruleset.yar @@ -0,0 +1,102 @@ +include "./true.yar" + +import "pe" +import "math" + +rule BASIC_BOOL { +condition: + true +} + +rule BASIC_BOOL2 { +condition: + false +} + +rule HEX_STRING { +strings: + $h1 = {01 23 45 67 89 ab} + $h2 = {cd ef 01 23 45 67} +condition: + any of ($h*) +} + +rule REGEX1 { +strings: + $r1 = /first regex/ +condition: + $r1 +} + +rule REGEX2 { +strings: + $r1 = /regex with mod i/i + $r2 = /regex with mod s/s +condition: + $r1 + or $r2 +} + +rule STRING1 { +strings: + $s1 = "ABCDEFG" +condition: + $s1 +} + +rule STRING2 { +strings: + $s1 = "ABCDEFG" + $s2 = "HIJKLMN" +condition: + $s1 or $s2 +} + +rule TAG : tag1 { +condition: + true +} + +rule TAG_STRING : tag2 { +strings: + $s1 = "ABCDEFG" +condition: + $s1 +} + +rule TAGS : tag1 tag2 tag3 { +condition: + true +} + +global rule GLOBAL { +condition: + true +} + +private rule PRIVATE { +condition: + true +} + +rule META { +meta: + meta_str = "string metadata" + meta_int = 42 + meta_neg = -42 + meta_true = true + meta_false = false +condition: + true +} + +rule XOR { +strings: + $xor1 = "xor!" xor + $xor2 = "xor?" nocase xor + $xor3 = /xor_/ xor + $no_xor1 = "no xor :(" wide + $no_xor2 = "no xor >:(" ascii nocase +condition: + any of them +} \ No newline at end of file diff --git a/tests/test.go b/tests/test.go new file mode 100644 index 0000000..296c5ce --- /dev/null +++ b/tests/test.go @@ -0,0 +1,31 @@ +package tests + +import ( + "bytes" + "io" + "os" + "testing" + + "github.com/VirusTotal/go-yara-parser/data" + "github.com/VirusTotal/go-yara-parser/grammar" +) + +// These are just utilities + +func openTestFile(t *testing.T, fname string) io.Reader { + f, err := os.Open(fname) + if err != nil { + t.Fatalf(`Couldn't open file "%s"`, fname) + } + return f +} + +func parseTestFile(t *testing.T, fname string) (data.RuleSet, error) { + f := openTestFile(t, fname) + return grammar.Parse(f, os.Stderr) +} + +func parseRuleStr(s string) (data.RuleSet, error) { + buf := bytes.NewBufferString(s) + return grammar.Parse(buf, os.Stderr) +} diff --git a/tests/unterminated_test.go b/tests/unterminated_test.go new file mode 100644 index 0000000..f77ea4b --- /dev/null +++ b/tests/unterminated_test.go @@ -0,0 +1,44 @@ +package tests + +import ( + "strings" + "testing" +) + +// TestUnterminatedString tests for a rule with an unterminated string +func TestUnterminatedString(t *testing.T) { + const rs = `rule unterminated_string { +meta: + description = "String missing a closing quote" +strings: + $s1 = "abcdefg +condition: + any of them +}` + _, err := parseRuleStr(rs) + unterminatedChecker(t, err) +} + +// TestUnterminatedRegex tests for a rule with an unterminated regex +func TestUnterminatedRegex(t *testing.T) { + const rs = `rule unterminated_regex { +meta: + description = "regex missing a closing slash" +strings: + $r1 = /abcdefg +condition: + any of them +}` + _, err := parseRuleStr(rs) + unterminatedChecker(t, err) +} + +// util func for checking an expected error for the word "unterminated" +func unterminatedChecker(t *testing.T, err error) { + if err == nil { + t.Fatalf("Error should not have been nil") + } + if !strings.Contains(err.Error(), "unterminated") { + t.Fatalf("Error other than unterminated string/regex: %s", err) + } +}