Skip to content

Commit

Permalink
feat: Update local matcher to the new requirements for resolve (#1481)
Browse files Browse the repository at this point in the history
Sets up logging to only return one error log per database that fails to
load, and also adds a LoadEcosystem function to preload/preerror
databases

imodels.go part can be safely ignored as that is removed in a followup
PR.
  • Loading branch information
another-rex authored Jan 9, 2025
1 parent aebdc23 commit 61a3731
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 48 deletions.
12 changes: 10 additions & 2 deletions cmd/osv-scanner/__snapshots__/main_test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -2404,7 +2404,11 @@ Filtered 1 local/unscannable package/s from the scan.
---

[TestRun_LocalDatabases_AlwaysOffline/a_bunch_of_different_lockfiles_and_ecosystem - 2]
could not find local databases for ecosystems: Alpine, Packagist, PyPI, RubyGems, npm
could not load db for PyPI ecosystem: unable to fetch OSV database: no offline version of the OSV database is available
could not load db for RubyGems ecosystem: unable to fetch OSV database: no offline version of the OSV database is available
could not load db for Alpine ecosystem: unable to fetch OSV database: no offline version of the OSV database is available
could not load db for Packagist ecosystem: unable to fetch OSV database: no offline version of the OSV database is available
could not load db for npm ecosystem: unable to fetch OSV database: no offline version of the OSV database is available

---

Expand All @@ -2426,7 +2430,11 @@ Filtered 1 local/unscannable package/s from the scan.
---

[TestRun_LocalDatabases_AlwaysOffline/a_bunch_of_different_lockfiles_and_ecosystem - 4]
could not find local databases for ecosystems: Alpine, Packagist, PyPI, RubyGems, npm
could not load db for PyPI ecosystem: unable to fetch OSV database: no offline version of the OSV database is available
could not load db for RubyGems ecosystem: unable to fetch OSV database: no offline version of the OSV database is available
could not load db for Alpine ecosystem: unable to fetch OSV database: no offline version of the OSV database is available
could not load db for Packagist ecosystem: unable to fetch OSV database: no offline version of the OSV database is available
could not load db for npm ecosystem: unable to fetch OSV database: no offline version of the OSV database is available

---

Expand Down
41 changes: 18 additions & 23 deletions internal/clients/clientimpl/localmatcher/localmatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ import (
"fmt"
"os"
"path"
"slices"
"strings"

"github.com/google/osv-scalibr/extractor"
"github.com/google/osv-scanner/internal/imodels"
Expand All @@ -26,6 +24,8 @@ type LocalMatcher struct {
dbBasePath string
dbs map[osvschema.Ecosystem]*ZipDB
downloadDB bool
// failedDBs keeps track of the errors when getting databases for each ecosystem
failedDBs map[osvschema.Ecosystem]error
// TODO(v2 logging): Remove this reporter
r reporter.Reporter
}
Expand All @@ -41,15 +41,13 @@ func NewLocalMatcher(r reporter.Reporter, localDBPath string, downloadDB bool) (
dbs: make(map[osvschema.Ecosystem]*ZipDB),
downloadDB: downloadDB,
r: r,
failedDBs: make(map[osvschema.Ecosystem]error),
}, nil
}

func (matcher *LocalMatcher) Match(ctx context.Context, invs []*extractor.Inventory) ([][]*models.Vulnerability, error) {
func (matcher *LocalMatcher) MatchVulnerabilities(ctx context.Context, invs []*extractor.Inventory) ([][]*models.Vulnerability, error) {
results := make([][]*models.Vulnerability, 0, len(invs))

// slice to track ecosystems that did not have an offline database available
var missingDBs []string

for _, inv := range invs {
if ctx.Err() != nil {
return nil, ctx.Err()
Expand All @@ -73,41 +71,38 @@ func (matcher *LocalMatcher) Match(ctx context.Context, invs []*extractor.Invent
db, err := matcher.loadDBFromCache(ctx, pkg.Ecosystem)

if err != nil {
if errors.Is(err, ErrOfflineDatabaseNotFound) {
missingDBs = append(missingDBs, string(pkg.Ecosystem.Ecosystem))
} else {
// TODO(V2 logging):
// the most likely error at this point is that the PURL could not be parsed
matcher.r.Errorf("could not load db for %s ecosystem: %v\n", pkg.Ecosystem, err)
}

results = append(results, []*models.Vulnerability{})

continue
}

results = append(results, db.VulnerabilitiesAffectingPackage(pkg))
}

if len(missingDBs) > 0 {
missingDBs = slices.Compact(missingDBs)
slices.Sort(missingDBs)
return results, nil
}

// TODO(v2 logging):
matcher.r.Errorf("could not find local databases for ecosystems: %s\n", strings.Join(missingDBs, ", "))
}
// LoadEcosystem tries to preload the ecosystem into the cache, and returns an error if the ecosystem
// cannot be loaded.
func (matcher *LocalMatcher) LoadEcosystem(ctx context.Context, ecosystem ecosystem.Parsed) error {
_, err := matcher.loadDBFromCache(ctx, ecosystem)

return results, nil
return err
}

func (matcher *LocalMatcher) loadDBFromCache(ctx context.Context, ecosystem ecosystem.Parsed) (*ZipDB, error) {
if db, ok := matcher.dbs[ecosystem.Ecosystem]; ok {
return db, nil
}

if matcher.failedDBs[ecosystem.Ecosystem] != nil {
return nil, matcher.failedDBs[ecosystem.Ecosystem]
}

db, err := NewZippedDB(ctx, matcher.dbBasePath, string(ecosystem.Ecosystem), fmt.Sprintf("%s/%s/all.zip", zippedDBRemoteHost, ecosystem.Ecosystem), !matcher.downloadDB)

if err != nil {
matcher.failedDBs[ecosystem.Ecosystem] = err
matcher.r.Errorf("could not load db for %s ecosystem: %v\n", ecosystem.Ecosystem, err)

return nil, err
}

Expand Down
2 changes: 1 addition & 1 deletion internal/clients/clientimpl/osvmatcher/osvmatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ type OSVMatcher struct {
InitialQueryTimeout time.Duration
}

func (matcher *OSVMatcher) Match(ctx context.Context, pkgs []*extractor.Inventory) ([][]*models.Vulnerability, error) {
func (matcher *OSVMatcher) MatchVulnerabilities(ctx context.Context, pkgs []*extractor.Inventory) ([][]*models.Vulnerability, error) {
var batchResp *osvdev.BatchedResponse
deadlineExceeded := false

Expand Down
2 changes: 1 addition & 1 deletion internal/clients/clientinterfaces/vulnerabilitymatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ import (
)

type VulnerabilityMatcher interface {
Match(ctx context.Context, invs []*extractor.Inventory) ([][]*models.Vulnerability, error)
MatchVulnerabilities(ctx context.Context, invs []*extractor.Inventory) ([][]*models.Vulnerability, error)
}
6 changes: 6 additions & 0 deletions internal/imodels/ecosystem/ecosystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ func MustParse(str string) Parsed {

// Parse parses a string into a constants.Ecosystem and an optional suffix specified with a ":"
func Parse(str string) (Parsed, error) {
// Special case to return an empty ecosystem if str is empty
// This is not considered an error.
if str == "" {
return Parsed{}, nil
}

ecosystem, suffix, _ := strings.Cut(str, ":")

// Always return the full parsed value even if it might be invalid
Expand Down
57 changes: 37 additions & 20 deletions internal/imodels/imodels.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/google/osv-scanner/internal/imodels/ecosystem"
"github.com/google/osv-scanner/internal/scalibrextract/vcs/gitrepo"
"github.com/google/osv-scanner/pkg/models"
"github.com/ossf/osv-schema/bindings/go/osvschema"

scalibrosv "github.com/google/osv-scalibr/extractor/filesystem/osv"
)
Expand Down Expand Up @@ -68,56 +69,72 @@ func FromInventory(inventory *extractor.Inventory) PackageInfo {
Location: inventory.Locations[0],
AdditionalLocations: inventory.Locations[1:],
OriginalInventory: inventory,
// TODO: SourceType
}

// Ignore this error for now as we can't do too much about an unknown ecosystem
// Set Ecosystem
eco, err := ecosystem.Parse(inventory.Ecosystem())
if err != nil {
// Ignore this error for now as we can't do too much about an unknown ecosystem
// TODO(v2): Replace with slog
log.Printf("Warning: %s\n", err.Error())
}

pkgInfo.Ecosystem = eco

// Set Commit and Repository
if inventory.SourceCode != nil {
pkgInfo.Commit = inventory.SourceCode.Commit
pkgInfo.Repository = inventory.SourceCode.Repo
}

// Set DepGroups
if dg, ok := inventory.Metadata.(scalibrosv.DepGroups); ok {
pkgInfo.DepGroups = dg.DepGroups()
}

// Set SourceType
if inventory.Extractor != nil {
extractorName := inventory.Extractor.Name()
if _, ok := osExtractors[extractorName]; ok {
pkgInfo.SourceType = SourceTypeOSPackage
} else if _, ok := sbomExtractors[extractorName]; ok {
pkgInfo.SourceType = SourceTypeSBOM

// TODO (V2): SBOMs have a special case where we manually convert the PURL here
// instead while PURL to ESI conversion is not complete
purl := inventory.Extractor.ToPURL(inventory)

if purl != nil {
// Error should never happen here since the PURL is from an already parsed purl
pi, _ := models.PURLToPackage(purl.String())
pkgInfo.Name = pi.Name
pkgInfo.Version = pi.Version
parsed, err := ecosystem.Parse(pi.Ecosystem)
if err != nil {
// TODO: Replace with slog
log.Printf("Warning, found unexpected ecosystem in purl %q, likely will not return any results for this package.\n", purl.String())
}
pkgInfo.Ecosystem = parsed
}
} else if _, ok := gitExtractors[extractorName]; ok {
pkgInfo.SourceType = SourceTypeGit
} else {
pkgInfo.SourceType = SourceTypeProjectPackage
}
}

// --- General Patching ---

// Scalibr uses go to indicate go compiler version
// We specifically cares about the stdlib version inside the package
// so convert the package name from go to stdlib
if eco.Ecosystem == osvschema.EcosystemGo && inventory.Name == "go" {
pkgInfo.Name = "stdlib"
}

// TODO (V2): SBOMs have a special case where we manually convert the PURL here
// instead while PURL to ESI conversion is not complete
if pkgInfo.SourceType == SourceTypeSBOM {
purl := inventory.Extractor.ToPURL(inventory)
if purl != nil {
// Error should never happen here since the PURL is from an already parsed purl
pi, _ := models.PURLToPackage(purl.String())
pkgInfo.Name = pi.Name
pkgInfo.Version = pi.Version
parsed, err := ecosystem.Parse(pi.Ecosystem)
if err != nil {
// TODO: Replace with slog
log.Printf("Warning, found unexpected ecosystem in purl %q, likely will not return any results for this package.\n", purl.String())
}
pkgInfo.Ecosystem = parsed
}
}

// --- Metadata Patching ---

// Depending on the specific metadata types set fields in package info.
if metadata, ok := inventory.Metadata.(*apk.Metadata); ok {
pkgInfo.OSPackageName = metadata.PackageName
} else if metadata, ok := inventory.Metadata.(*dpkg.Metadata); ok {
Expand Down
2 changes: 1 addition & 1 deletion pkg/osvscanner/osvscanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ func makeRequestWithMatcher(
invs = append(invs, pkgs.PackageInfo.OriginalInventory)
}

res, err := matcher.Match(context.Background(), invs)
res, err := matcher.MatchVulnerabilities(context.Background(), invs)
if err != nil {
// TODO: Handle error here
r.Errorf("error when retrieving vulns: %v", err)
Expand Down

0 comments on commit 61a3731

Please sign in to comment.