From e86d5a73da637ecc50ad74fb11e49993442ce622 Mon Sep 17 00:00:00 2001 From: Peter Stace Date: Fri, 11 Jun 2021 13:44:13 +1000 Subject: [PATCH] Add Fuzz testing for WKT parsing --- geom/.gitignore | 1 + geom/wkt_fuzz_test.go | 56 +++++++++++++++++ internal/cmprefimpl/cmpgeos/extract_source.go | 53 ---------------- internal/cmprefimpl/cmpgeos/main.go | 3 +- internal/cmprefimpl/cmppg/fuzz_test.go | 59 ++---------------- .../extract/extract_strings_from_source.go | 60 +++++++++++++++++++ 6 files changed, 124 insertions(+), 108 deletions(-) create mode 100644 geom/.gitignore create mode 100644 geom/wkt_fuzz_test.go create mode 100644 internal/extract/extract_strings_from_source.go diff --git a/geom/.gitignore b/geom/.gitignore new file mode 100644 index 00000000..d383c56f --- /dev/null +++ b/geom/.gitignore @@ -0,0 +1 @@ +testdata diff --git a/geom/wkt_fuzz_test.go b/geom/wkt_fuzz_test.go new file mode 100644 index 00000000..f4ade937 --- /dev/null +++ b/geom/wkt_fuzz_test.go @@ -0,0 +1,56 @@ +// +build gofuzzbeta + +package geom_test + +import ( + "strings" + "testing" + "time" + + "github.com/peterstace/simplefeatures/geom" + "github.com/peterstace/simplefeatures/internal/extract" +) + +func FuzzParseUnmarshalWKT(f *testing.F) { + corpus, err := extract.StringsFromSource("..") + if err != nil { + f.Fatalf("could not build corpus: %v", err) + } + for _, str := range corpus { + if allowInCorpus(str) { + f.Add(str) + f.Log(str) + } + } + + f.Fuzz(func(t *testing.T, wkt string) { + done := make(chan struct{}) + go func() { + geom.UnmarshalWKT(wkt, geom.DisableAllValidations) + close(done) + }() + select { + case <-done: + // do nothing + case <-time.After(100 * time.Millisecond): + t.Fatal("timed out") + } + }) +} + +func allowInCorpus(s string) bool { + for _, prefix := range []string{ + "POINT", + "MULTIPOINT", + "LINESTRING", + "MULTILINESTRING", + "POLYGON", + "MULTIPOLYGON", + "GEOMETRYCOLLECTION", + } { + if strings.HasPrefix(s, prefix) { + return true + } + } + return false +} diff --git a/internal/cmprefimpl/cmpgeos/extract_source.go b/internal/cmprefimpl/cmpgeos/extract_source.go index e802c5dd..964c8ecb 100644 --- a/internal/cmprefimpl/cmpgeos/extract_source.go +++ b/internal/cmprefimpl/cmpgeos/extract_source.go @@ -2,64 +2,11 @@ package main import ( "errors" - "fmt" - "go/ast" - "go/parser" - "go/token" - "os" - "path/filepath" - "sort" "strconv" - "strings" "github.com/peterstace/simplefeatures/geom" ) -func extractStringsFromSource(dir string) ([]string, error) { - var strs []string - if err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - if !info.IsDir() || strings.Contains(path, ".git") { - return nil - } - pkgs, err := parser.ParseDir(new(token.FileSet), path, nil, 0) - if err != nil { - return err - } - for _, pkg := range pkgs { - ast.Inspect(pkg, func(n ast.Node) bool { - lit, ok := n.(*ast.BasicLit) - if !ok || lit.Kind != token.STRING { - return true - } - unquoted, err := strconv.Unquote(lit.Value) - if !ok { - // Shouldn't ever happen because we've validated that it's a string literal. - panic(fmt.Sprintf("could not unquote string '%s'from ast: %v", lit.Value, err)) - } - strs = append(strs, unquoted) - return true - }) - } - return nil - }); err != nil { - return nil, err - } - - strSet := map[string]struct{}{} - for _, s := range strs { - strSet[strings.TrimSpace(s)] = struct{}{} - } - strs = strs[:0] - for s := range strSet { - strs = append(strs, s) - } - sort.Strings(strs) - return strs, nil -} - func convertToGeometries(candidates []string) ([]geom.Geometry, error) { var geoms []geom.Geometry for _, c := range candidates { diff --git a/internal/cmprefimpl/cmpgeos/main.go b/internal/cmprefimpl/cmpgeos/main.go index 8daab000..adeea743 100644 --- a/internal/cmprefimpl/cmpgeos/main.go +++ b/internal/cmprefimpl/cmpgeos/main.go @@ -10,6 +10,7 @@ import ( "strings" "github.com/peterstace/simplefeatures/geom" + "github.com/peterstace/simplefeatures/internal/extract" ) // TODO: These are additional geometries. Needs something a bit more robust... @@ -24,7 +25,7 @@ func main() { if err != nil { log.Fatalf("could not get working dir: %v", err) } - candidates, err := extractStringsFromSource(dir) + candidates, err := extract.StringsFromSource(dir) if err != nil { log.Fatalf("could not extract strings from src: %v", err) } diff --git a/internal/cmprefimpl/cmppg/fuzz_test.go b/internal/cmprefimpl/cmppg/fuzz_test.go index 7d5cb7c6..4024cc98 100644 --- a/internal/cmprefimpl/cmppg/fuzz_test.go +++ b/internal/cmprefimpl/cmppg/fuzz_test.go @@ -3,23 +3,19 @@ package main import ( "database/sql" "fmt" - "go/ast" - "go/parser" - "go/token" - "os" - "path/filepath" - "sort" - "strconv" - "strings" "testing" _ "github.com/lib/pq" "github.com/peterstace/simplefeatures/geom" + "github.com/peterstace/simplefeatures/internal/extract" ) func TestFuzz(t *testing.T) { pg := setupDB(t) - candidates := extractStringsFromSource(t) + candidates, err := extract.StringsFromSource("../../..") + if err != nil { + t.Fatalf("could not extract strings from source: %v", err) + } CheckWKTParse(t, pg, candidates) CheckWKBParse(t, pg, candidates) @@ -72,51 +68,6 @@ func setupDB(t *testing.T) PostGIS { return PostGIS{db} } -func extractStringsFromSource(t *testing.T) []string { - var strs []string - if err := filepath.Walk("../../..", func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - if !info.IsDir() || strings.Contains(path, ".git") { - return nil - } - pkgs, err := parser.ParseDir(new(token.FileSet), path, nil, 0) - if err != nil { - return err - } - for _, pkg := range pkgs { - ast.Inspect(pkg, func(n ast.Node) bool { - lit, ok := n.(*ast.BasicLit) - if !ok || lit.Kind != token.STRING { - return true - } - unquoted, err := strconv.Unquote(lit.Value) - if !ok { - // Shouldn't ever happen because we've validated that it's a string literal. - panic(fmt.Sprintf("could not unquote string '%s'from ast: %v", lit.Value, err)) - } - strs = append(strs, unquoted) - return true - }) - } - return nil - }); err != nil { - t.Fatal(err) - } - - strSet := map[string]struct{}{} - for _, s := range strs { - strSet[strings.TrimSpace(s)] = struct{}{} - } - strs = strs[:0] - for s := range strSet { - strs = append(strs, s) - } - sort.Strings(strs) - return strs -} - func convertToGeometries(t *testing.T, candidates []string) []geom.Geometry { var geoms []geom.Geometry for _, c := range candidates { diff --git a/internal/extract/extract_strings_from_source.go b/internal/extract/extract_strings_from_source.go new file mode 100644 index 00000000..73128226 --- /dev/null +++ b/internal/extract/extract_strings_from_source.go @@ -0,0 +1,60 @@ +package extract + +import ( + "fmt" + "go/ast" + "go/parser" + "go/token" + "os" + "path/filepath" + "sort" + "strconv" + "strings" +) + +// StringsFromSource parses the Go files (recursively) contained in the given +// dir, and returns any string literals contained therein. +func StringsFromSource(dir string) ([]string, error) { + var strs []string + if err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() || strings.Contains(path, ".git") { + return nil + } + pkgs, err := parser.ParseDir(new(token.FileSet), path, nil, 0) + if err != nil { + return err + } + for _, pkg := range pkgs { + ast.Inspect(pkg, func(n ast.Node) bool { + lit, ok := n.(*ast.BasicLit) + if !ok || lit.Kind != token.STRING { + return true + } + unquoted, err := strconv.Unquote(lit.Value) + if !ok { + // Shouldn't ever happen because we've validated that it's a string literal. + panic(fmt.Sprintf("could not unquote string '%s' from AST: %v", lit.Value, err)) + } + strs = append(strs, unquoted) + return true + }) + } + return nil + }); err != nil { + return nil, err + } + + strSet := map[string]struct{}{} + for _, s := range strs { + strSet[strings.TrimSpace(s)] = struct{}{} + } + strs = strs[:0] + for s := range strSet { + strs = append(strs, s) + } + sort.Strings(strs) + return strs, nil +}