From 61a3731f6f3d5a4176186c73706c225ef240659a Mon Sep 17 00:00:00 2001 From: Rex P <106129829+another-rex@users.noreply.github.com> Date: Thu, 9 Jan 2025 13:04:12 +1100 Subject: [PATCH] feat: Update local matcher to the new requirements for resolve (#1481) Sets up logging to only return one error log per database that fails to load, and also adds a LoadEcosystem function to preload/preerror databases imodels.go part can be safely ignored as that is removed in a followup PR. --- cmd/osv-scanner/__snapshots__/main_test.snap | 12 +++- .../clientimpl/localmatcher/localmatcher.go | 41 ++++++------- .../clientimpl/osvmatcher/osvmatcher.go | 2 +- .../clientinterfaces/vulnerabilitymatcher.go | 2 +- internal/imodels/ecosystem/ecosystem.go | 6 ++ internal/imodels/imodels.go | 57 ++++++++++++------- pkg/osvscanner/osvscanner.go | 2 +- 7 files changed, 74 insertions(+), 48 deletions(-) diff --git a/cmd/osv-scanner/__snapshots__/main_test.snap b/cmd/osv-scanner/__snapshots__/main_test.snap index f118e8da57..9482c9aa8c 100755 --- a/cmd/osv-scanner/__snapshots__/main_test.snap +++ b/cmd/osv-scanner/__snapshots__/main_test.snap @@ -2404,7 +2404,11 @@ Filtered 1 local/unscannable package/s from the scan. --- [TestRun_LocalDatabases_AlwaysOffline/a_bunch_of_different_lockfiles_and_ecosystem - 2] -could not find local databases for ecosystems: Alpine, Packagist, PyPI, RubyGems, npm +could not load db for PyPI ecosystem: unable to fetch OSV database: no offline version of the OSV database is available +could not load db for RubyGems ecosystem: unable to fetch OSV database: no offline version of the OSV database is available +could not load db for Alpine ecosystem: unable to fetch OSV database: no offline version of the OSV database is available +could not load db for Packagist ecosystem: unable to fetch OSV database: no offline version of the OSV database is available +could not load db for npm ecosystem: unable to fetch OSV database: no offline version of the OSV database is available --- @@ -2426,7 +2430,11 @@ Filtered 1 local/unscannable package/s from the scan. --- [TestRun_LocalDatabases_AlwaysOffline/a_bunch_of_different_lockfiles_and_ecosystem - 4] -could not find local databases for ecosystems: Alpine, Packagist, PyPI, RubyGems, npm +could not load db for PyPI ecosystem: unable to fetch OSV database: no offline version of the OSV database is available +could not load db for RubyGems ecosystem: unable to fetch OSV database: no offline version of the OSV database is available +could not load db for Alpine ecosystem: unable to fetch OSV database: no offline version of the OSV database is available +could not load db for Packagist ecosystem: unable to fetch OSV database: no offline version of the OSV database is available +could not load db for npm ecosystem: unable to fetch OSV database: no offline version of the OSV database is available --- diff --git a/internal/clients/clientimpl/localmatcher/localmatcher.go b/internal/clients/clientimpl/localmatcher/localmatcher.go index 69ce879fd5..4f0ce460ef 100644 --- a/internal/clients/clientimpl/localmatcher/localmatcher.go +++ b/internal/clients/clientimpl/localmatcher/localmatcher.go @@ -6,8 +6,6 @@ import ( "fmt" "os" "path" - "slices" - "strings" "github.com/google/osv-scalibr/extractor" "github.com/google/osv-scanner/internal/imodels" @@ -26,6 +24,8 @@ type LocalMatcher struct { dbBasePath string dbs map[osvschema.Ecosystem]*ZipDB downloadDB bool + // failedDBs keeps track of the errors when getting databases for each ecosystem + failedDBs map[osvschema.Ecosystem]error // TODO(v2 logging): Remove this reporter r reporter.Reporter } @@ -41,15 +41,13 @@ func NewLocalMatcher(r reporter.Reporter, localDBPath string, downloadDB bool) ( dbs: make(map[osvschema.Ecosystem]*ZipDB), downloadDB: downloadDB, r: r, + failedDBs: make(map[osvschema.Ecosystem]error), }, nil } -func (matcher *LocalMatcher) Match(ctx context.Context, invs []*extractor.Inventory) ([][]*models.Vulnerability, error) { +func (matcher *LocalMatcher) MatchVulnerabilities(ctx context.Context, invs []*extractor.Inventory) ([][]*models.Vulnerability, error) { results := make([][]*models.Vulnerability, 0, len(invs)) - // slice to track ecosystems that did not have an offline database available - var missingDBs []string - for _, inv := range invs { if ctx.Err() != nil { return nil, ctx.Err() @@ -73,31 +71,21 @@ func (matcher *LocalMatcher) Match(ctx context.Context, invs []*extractor.Invent db, err := matcher.loadDBFromCache(ctx, pkg.Ecosystem) if err != nil { - if errors.Is(err, ErrOfflineDatabaseNotFound) { - missingDBs = append(missingDBs, string(pkg.Ecosystem.Ecosystem)) - } else { - // TODO(V2 logging): - // the most likely error at this point is that the PURL could not be parsed - matcher.r.Errorf("could not load db for %s ecosystem: %v\n", pkg.Ecosystem, err) - } - - results = append(results, []*models.Vulnerability{}) - continue } results = append(results, db.VulnerabilitiesAffectingPackage(pkg)) } - if len(missingDBs) > 0 { - missingDBs = slices.Compact(missingDBs) - slices.Sort(missingDBs) + return results, nil +} - // TODO(v2 logging): - matcher.r.Errorf("could not find local databases for ecosystems: %s\n", strings.Join(missingDBs, ", ")) - } +// LoadEcosystem tries to preload the ecosystem into the cache, and returns an error if the ecosystem +// cannot be loaded. +func (matcher *LocalMatcher) LoadEcosystem(ctx context.Context, ecosystem ecosystem.Parsed) error { + _, err := matcher.loadDBFromCache(ctx, ecosystem) - return results, nil + return err } func (matcher *LocalMatcher) loadDBFromCache(ctx context.Context, ecosystem ecosystem.Parsed) (*ZipDB, error) { @@ -105,9 +93,16 @@ func (matcher *LocalMatcher) loadDBFromCache(ctx context.Context, ecosystem ecos return db, nil } + if matcher.failedDBs[ecosystem.Ecosystem] != nil { + return nil, matcher.failedDBs[ecosystem.Ecosystem] + } + db, err := NewZippedDB(ctx, matcher.dbBasePath, string(ecosystem.Ecosystem), fmt.Sprintf("%s/%s/all.zip", zippedDBRemoteHost, ecosystem.Ecosystem), !matcher.downloadDB) if err != nil { + matcher.failedDBs[ecosystem.Ecosystem] = err + matcher.r.Errorf("could not load db for %s ecosystem: %v\n", ecosystem.Ecosystem, err) + return nil, err } diff --git a/internal/clients/clientimpl/osvmatcher/osvmatcher.go b/internal/clients/clientimpl/osvmatcher/osvmatcher.go index 8c67053221..731c7c73ea 100644 --- a/internal/clients/clientimpl/osvmatcher/osvmatcher.go +++ b/internal/clients/clientimpl/osvmatcher/osvmatcher.go @@ -30,7 +30,7 @@ type OSVMatcher struct { InitialQueryTimeout time.Duration } -func (matcher *OSVMatcher) Match(ctx context.Context, pkgs []*extractor.Inventory) ([][]*models.Vulnerability, error) { +func (matcher *OSVMatcher) MatchVulnerabilities(ctx context.Context, pkgs []*extractor.Inventory) ([][]*models.Vulnerability, error) { var batchResp *osvdev.BatchedResponse deadlineExceeded := false diff --git a/internal/clients/clientinterfaces/vulnerabilitymatcher.go b/internal/clients/clientinterfaces/vulnerabilitymatcher.go index 68f16ab17d..300cb56a9e 100644 --- a/internal/clients/clientinterfaces/vulnerabilitymatcher.go +++ b/internal/clients/clientinterfaces/vulnerabilitymatcher.go @@ -8,5 +8,5 @@ import ( ) type VulnerabilityMatcher interface { - Match(ctx context.Context, invs []*extractor.Inventory) ([][]*models.Vulnerability, error) + MatchVulnerabilities(ctx context.Context, invs []*extractor.Inventory) ([][]*models.Vulnerability, error) } diff --git a/internal/imodels/ecosystem/ecosystem.go b/internal/imodels/ecosystem/ecosystem.go index 8fe7d86e49..677ef70dc0 100644 --- a/internal/imodels/ecosystem/ecosystem.go +++ b/internal/imodels/ecosystem/ecosystem.go @@ -94,6 +94,12 @@ func MustParse(str string) Parsed { // Parse parses a string into a constants.Ecosystem and an optional suffix specified with a ":" func Parse(str string) (Parsed, error) { + // Special case to return an empty ecosystem if str is empty + // This is not considered an error. + if str == "" { + return Parsed{}, nil + } + ecosystem, suffix, _ := strings.Cut(str, ":") // Always return the full parsed value even if it might be invalid diff --git a/internal/imodels/imodels.go b/internal/imodels/imodels.go index e936ac010d..eafaf49356 100644 --- a/internal/imodels/imodels.go +++ b/internal/imodels/imodels.go @@ -12,6 +12,7 @@ import ( "github.com/google/osv-scanner/internal/imodels/ecosystem" "github.com/google/osv-scanner/internal/scalibrextract/vcs/gitrepo" "github.com/google/osv-scanner/pkg/models" + "github.com/ossf/osv-schema/bindings/go/osvschema" scalibrosv "github.com/google/osv-scalibr/extractor/filesystem/osv" ) @@ -68,49 +69,35 @@ func FromInventory(inventory *extractor.Inventory) PackageInfo { Location: inventory.Locations[0], AdditionalLocations: inventory.Locations[1:], OriginalInventory: inventory, - // TODO: SourceType } - // Ignore this error for now as we can't do too much about an unknown ecosystem + // Set Ecosystem eco, err := ecosystem.Parse(inventory.Ecosystem()) if err != nil { + // Ignore this error for now as we can't do too much about an unknown ecosystem // TODO(v2): Replace with slog log.Printf("Warning: %s\n", err.Error()) } - pkgInfo.Ecosystem = eco + // Set Commit and Repository if inventory.SourceCode != nil { pkgInfo.Commit = inventory.SourceCode.Commit pkgInfo.Repository = inventory.SourceCode.Repo } + // Set DepGroups if dg, ok := inventory.Metadata.(scalibrosv.DepGroups); ok { pkgInfo.DepGroups = dg.DepGroups() } + + // Set SourceType if inventory.Extractor != nil { extractorName := inventory.Extractor.Name() if _, ok := osExtractors[extractorName]; ok { pkgInfo.SourceType = SourceTypeOSPackage } else if _, ok := sbomExtractors[extractorName]; ok { pkgInfo.SourceType = SourceTypeSBOM - - // TODO (V2): SBOMs have a special case where we manually convert the PURL here - // instead while PURL to ESI conversion is not complete - purl := inventory.Extractor.ToPURL(inventory) - - if purl != nil { - // Error should never happen here since the PURL is from an already parsed purl - pi, _ := models.PURLToPackage(purl.String()) - pkgInfo.Name = pi.Name - pkgInfo.Version = pi.Version - parsed, err := ecosystem.Parse(pi.Ecosystem) - if err != nil { - // TODO: Replace with slog - log.Printf("Warning, found unexpected ecosystem in purl %q, likely will not return any results for this package.\n", purl.String()) - } - pkgInfo.Ecosystem = parsed - } } else if _, ok := gitExtractors[extractorName]; ok { pkgInfo.SourceType = SourceTypeGit } else { @@ -118,6 +105,36 @@ func FromInventory(inventory *extractor.Inventory) PackageInfo { } } + // --- General Patching --- + + // Scalibr uses go to indicate go compiler version + // We specifically cares about the stdlib version inside the package + // so convert the package name from go to stdlib + if eco.Ecosystem == osvschema.EcosystemGo && inventory.Name == "go" { + pkgInfo.Name = "stdlib" + } + + // TODO (V2): SBOMs have a special case where we manually convert the PURL here + // instead while PURL to ESI conversion is not complete + if pkgInfo.SourceType == SourceTypeSBOM { + purl := inventory.Extractor.ToPURL(inventory) + if purl != nil { + // Error should never happen here since the PURL is from an already parsed purl + pi, _ := models.PURLToPackage(purl.String()) + pkgInfo.Name = pi.Name + pkgInfo.Version = pi.Version + parsed, err := ecosystem.Parse(pi.Ecosystem) + if err != nil { + // TODO: Replace with slog + log.Printf("Warning, found unexpected ecosystem in purl %q, likely will not return any results for this package.\n", purl.String()) + } + pkgInfo.Ecosystem = parsed + } + } + + // --- Metadata Patching --- + + // Depending on the specific metadata types set fields in package info. if metadata, ok := inventory.Metadata.(*apk.Metadata); ok { pkgInfo.OSPackageName = metadata.PackageName } else if metadata, ok := inventory.Metadata.(*dpkg.Metadata); ok { diff --git a/pkg/osvscanner/osvscanner.go b/pkg/osvscanner/osvscanner.go index 3d94ea7821..85477485c8 100644 --- a/pkg/osvscanner/osvscanner.go +++ b/pkg/osvscanner/osvscanner.go @@ -207,7 +207,7 @@ func makeRequestWithMatcher( invs = append(invs, pkgs.PackageInfo.OriginalInventory) } - res, err := matcher.Match(context.Background(), invs) + res, err := matcher.MatchVulnerabilities(context.Background(), invs) if err != nil { // TODO: Handle error here r.Errorf("error when retrieving vulns: %v", err)