Skip to content

Commit

Permalink
Initial support for ~ operator in hex strings. (VirusTotal#62)
Browse files Browse the repository at this point in the history
* Initial support for ~ operator in hex strings.

This allows gyp to parse the ~ operator in hex strings. It isn't fully
implemented yet as it doesn't work with wildcards, but that will be next on my
list.

* Allow users to define their own flexgo/goyacc/protoc-gen-go

This allows users to build with:

FLEXGO=/Users/wxs/bin/flexgo/bin/flex GOYACC=/Users/wxs/go/bin/goyacc PROTOC=/Users/wxs/go/bin/protoc-gen-go make

If you have them in your path already it will still work, but if you need to
define them yourself you can now do so.

* Add support for not operator on wildcards.
  • Loading branch information
wxsBSD authored Dec 22, 2022
1 parent a642c76 commit afbe7ea
Show file tree
Hide file tree
Showing 14 changed files with 861 additions and 720 deletions.
10 changes: 7 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
FLEXGO ?= flexgo
GOYACC ?= goyacc
PROTOC ?= protoc-gen-go

all: proto hexgrammar grammar y2j j2y

grammar:
flexgo -G -v -o parser/lexer.go parser/lexer.l && goyacc -p yr -o parser/parser.go parser/grammar.y
${FLEXGO} -G -v -o parser/lexer.go parser/lexer.l && ${GOYACC} -p yr -o parser/parser.go parser/grammar.y

hexgrammar:
flexgo -G -v -o hex/hex_lexer.go hex/hex_lexer.l && goyacc -p hex -o hex/hex_parser.go hex/hex_grammar.y
${FLEXGO} -G -v -o hex/hex_lexer.go hex/hex_lexer.l && ${GOYACC} -p hex -o hex/hex_parser.go hex/hex_grammar.y

proto:
protoc --go_out=. --go_opt=paths=source_relative pb/yara.proto
protoc --plugin=${PROTOC} --go_out=. --go_opt=paths=source_relative pb/yara.proto

j2y:
go build github.com/VirusTotal/gyp/cmd/j2y
Expand Down
1 change: 1 addition & 0 deletions ast/serialization.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ func hexTokensFromProto(pbTokens *pb.HexTokens) HexTokens {
tokens[i] = &HexBytes{
Bytes: v.Sequence.GetValue(),
Masks: v.Sequence.GetMask(),
Nots: v.Sequence.GetNots(),
}
case *pb.HexToken_Alternative:
alternatives := make(HexTokens, len(v.Alternative.GetTokens()))
Expand Down
19 changes: 12 additions & 7 deletions ast/strings.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,12 @@ type HexJump struct {
// 0F -> The higher nibble is ignored (?X)
// F0 -> The lower nibble is ignored (X?)
// FF -> No wildcard at all.
// The Nots array is an array of boolean values that indicate which of the
// bytes are prefixed with a ~ indicating they should NOT be the given value.
type HexBytes struct {
Bytes []byte
Masks []byte
Nots []bool
}

// HexOr is an HexToken that represents an alternative in the hex string, like
Expand Down Expand Up @@ -311,18 +314,19 @@ func (h *HexString) WriteSource(w io.Writer) (err error) {
// WriteSource writes the node's source into the writer w.
func (h *HexBytes) WriteSource(w io.Writer) error {
for i, b := range h.Bytes {
var s string
var s string = ""
if h.Nots[i] {
s += "~"
}
switch mask := h.Masks[i]; mask {
case 0x00:
s = "?? "
s += "?? "
case 0x0F:
s = fmt.Sprintf("%02X ", b)
s = "?" + s[1:]
s += "?" + fmt.Sprintf("%02X ", b)[1:]
case 0xF0:
s = fmt.Sprintf("%02X", b)
s = s[:1] + "? "
s += fmt.Sprintf("%02X", b)[:1] + "? "
case 0xFF:
s = fmt.Sprintf("%02X ", b)
s += fmt.Sprintf("%02X ", b)
default:
panic(fmt.Errorf(`unexpected byte mask: "%0X"`, mask))
}
Expand Down Expand Up @@ -436,6 +440,7 @@ func (h *HexBytes) AsProto() *pb.BytesSequence {
return &pb.BytesSequence{
Value: h.Bytes,
Mask: h.Masks,
Nots: h.Nots,
}
}

Expand Down
2 changes: 1 addition & 1 deletion gyp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ rule foo {
`
rule foo {
strings:
$a = { 01 02 03 04 ?? AA B? ?C }
$a = { 01 02 ~03 04 ?? AA B? ?C }
condition:
$a
}
Expand Down
8 changes: 8 additions & 0 deletions hex/hex_grammar.y
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,13 @@ const StringChainingThreshold int = 200
type byteWithMask struct {
Value byte
Mask byte
Not bool
}
%}


%token <bm> _BYTE_
%token <bm> _NOT_BYTE_
%token <bm> _MASKED_BYTE_
%token <integer> _NUMBER_
%token _LBRACE_
Expand Down Expand Up @@ -251,12 +253,14 @@ bytes
$$ = &ast.HexBytes{
Bytes: []byte{$1.Value},
Masks: []byte{$1.Mask},
Nots: []bool{$1.Not},
}
}
| bytes byte
{
$1.Bytes = append($1.Bytes, $2.Value)
$1.Masks = append($1.Masks, $2.Mask)
$1.Nots = append($1.Nots, $2.Not)
}


Expand All @@ -269,6 +273,10 @@ byte
{
$$ = $1
}
| _NOT_BYTE_
{
$$ = $1
}
;

%%
Loading

0 comments on commit afbe7ea

Please sign in to comment.