From b15b566b5ea7c97c9d3402f82a2c7c317ce1037c Mon Sep 17 00:00:00 2001 From: Rex P <106129829+another-rex@users.noreply.github.com> Date: Fri, 1 Nov 2024 15:18:33 +1100 Subject: [PATCH] feat: Use lockfile scalibr interface (#1330) This PR contains all the code required to move to osv-scalibr while making the existing code compile and pass all tests (container tests not passing because of a bug in the scalibr alpine extractor). Changes not mentioned in the following list will be split off in separate PRs which should land before this PR. Those are: - [x] #1337 - [x] #1331 - [x] #1338 - [x] #1341 - [x] #1345 Changes in this PR: - Fixture changes: - Scalibr Python requirements.txt extractor currently doesn't support packages without versions, so added some version strings to the test files - Image package required quite a bit of reworking to successfully update. - Add the ability to iterate through a directory via the pathtree library - Support scalibr FS interface for Layers - Add conversion code to convert inventories from osv-scalibr back to v1's lockfile and Inventory - This is done to minimize snapshot changes. Followup PRs should remove this conversion - Add `internal/lockfilescalibr` package: - `errors.go` adds common extraction errors we want to translate. - `translation.go` adds helper functions and translation logic between osv-scanner v1 extractor names, and osv-scalibr extractor names. Changes in followup PRs: - Delete lockfiles package and migrate everything to use osv-scalibr extractors - Remove conversion code in image --------- Signed-off-by: dependabot[bot] Co-authored-by: Gareth Jones Co-authored-by: Xueqin Cui <72771658+cuixq@users.noreply.github.com> Co-authored-by: Michael Kedar Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- cmd/osv-scanner/__snapshots__/main_test.snap | 36 ++-- .../locks-requirements/my-requirements.txt | 2 +- .../locks-requirements/requirements-dev.txt | 2 +- .../locks-requirements/requirements.txt | 4 +- .../the_requirements_for_test.txt | 2 +- cmd/osv-scanner/main_test.go | 7 +- go.mod | 2 +- go.sum | 4 +- internal/image/__snapshots__/image_test.snap | 32 +-- internal/image/extractor.go | 153 ++++++-------- ...ine-release => alpine-3.18-alpine-release} | 0 .../image/fixtures/alpine-3.18-os-release | 7 + .../image/fixtures/test-alpine.Dockerfile | 5 +- .../test-node_modules-npm-empty.Dockerfile | 2 +- .../test-node_modules-npm-full.Dockerfile | 2 +- .../test-node_modules-pnpm-empty.Dockerfile | 2 +- .../test-node_modules-pnpm-full.Dockerfile | 2 +- .../test-node_modules-yarn-empty.Dockerfile | 2 +- .../test-node_modules-yarn-full.Dockerfile | 2 +- internal/image/image.go | 2 +- internal/image/image_test.go | 5 - internal/image/layer.go | 111 ++++++++++- internal/image/pathtree/pathtree_test.go | 4 +- internal/image/scan.go | 115 ++++++++++- internal/lockfilescalibr/errors.go | 9 + .../language/java/pomxmlnet/extractor.go | 6 +- .../language/java/pomxmlnet/extractor_test.go | 1 - .../javascript/nodemodules/extractor.go | 4 +- .../language/osv/osvscannerjson/extractor.go | 11 +- internal/lockfilescalibr/translation.go | 188 ++++++++++++++++++ internal/lockfilescalibr/translation_test.go | 23 +++ pkg/osvscanner/osvscanner.go | 144 ++++++++------ 32 files changed, 660 insertions(+), 231 deletions(-) rename internal/image/fixtures/{alpine-3.19-alpine-release => alpine-3.18-alpine-release} (100%) create mode 100644 internal/image/fixtures/alpine-3.18-os-release create mode 100644 internal/lockfilescalibr/errors.go create mode 100644 internal/lockfilescalibr/translation.go create mode 100644 internal/lockfilescalibr/translation_test.go diff --git a/cmd/osv-scanner/__snapshots__/main_test.snap b/cmd/osv-scanner/__snapshots__/main_test.snap index 40d4ab783c..6e7937416b 100755 --- a/cmd/osv-scanner/__snapshots__/main_test.snap +++ b/cmd/osv-scanner/__snapshots__/main_test.snap @@ -349,9 +349,9 @@ overriding license for package Packagist/league/flysystem/1.0.8 with 0BSD | LICENSE VIOLATION | ECOSYSTEM | PACKAGE | VERSION | SOURCE | +-------------------+-----------+------------------------------------------------+---------+-------------------------------------------------------+ | 0BSD | Packagist | league/flysystem | 1.0.8 | fixtures/locks-insecure/composer.lock | -| UNKNOWN | | https://github.com/flutter/buildroot.git | | fixtures/locks-insecure/osv-scanner-flutter-deps.json | -| UNKNOWN | | https://github.com/brendan-duncan/archive.git | | fixtures/locks-insecure/osv-scanner-flutter-deps.json | | UNKNOWN | | https://chromium.googlesource.com/chromium/src | | fixtures/locks-insecure/osv-scanner-flutter-deps.json | +| UNKNOWN | | https://github.com/brendan-duncan/archive.git | | fixtures/locks-insecure/osv-scanner-flutter-deps.json | +| UNKNOWN | | https://github.com/flutter/buildroot.git | | fixtures/locks-insecure/osv-scanner-flutter-deps.json | | UNKNOWN | RubyGems | ast | 2.4.2 | fixtures/locks-many/Gemfile.lock | | 0BSD | Packagist | sentry/sdk | 2.0.4 | fixtures/locks-many/composer.lock | +-------------------+-----------+------------------------------------------------+---------+-------------------------------------------------------+ @@ -1856,6 +1856,7 @@ Filtered 16 vulnerabilities from output | https://osv.dev/DLA-3325-1 | | Debian | openssl | 1.1.0l-1~deb9u5 | fixtures/sbom-insecure/postgres-stretch.cdx.xml | | https://osv.dev/DLA-3449-1 | | Debian | openssl | 1.1.0l-1~deb9u5 | fixtures/sbom-insecure/postgres-stretch.cdx.xml | | https://osv.dev/DLA-3530-1 | | Debian | openssl | 1.1.0l-1~deb9u5 | fixtures/sbom-insecure/postgres-stretch.cdx.xml | +| https://osv.dev/DLA-3942-1 | | Debian | openssl | 1.1.0l-1~deb9u5 | fixtures/sbom-insecure/postgres-stretch.cdx.xml | | https://osv.dev/DSA-4539-3 | | Debian | openssl | 1.1.0l-1~deb9u5 | fixtures/sbom-insecure/postgres-stretch.cdx.xml | | https://osv.dev/CVE-2017-12837 | 7.5 | Debian | perl | 5.24.1-3+deb9u7 | fixtures/sbom-insecure/postgres-stretch.cdx.xml | | https://osv.dev/CVE-2017-12883 | 9.1 | Debian | perl | 5.24.1-3+deb9u7 | fixtures/sbom-insecure/postgres-stretch.cdx.xml | @@ -2034,6 +2035,7 @@ Filtered 16 vulnerabilities from output | https://osv.dev/DLA-3325-1 | | Debian | openssl | 1.1.0l-1~deb9u5 | fixtures/sbom-insecure/postgres-stretch.cdx.xml | | https://osv.dev/DLA-3449-1 | | Debian | openssl | 1.1.0l-1~deb9u5 | fixtures/sbom-insecure/postgres-stretch.cdx.xml | | https://osv.dev/DLA-3530-1 | | Debian | openssl | 1.1.0l-1~deb9u5 | fixtures/sbom-insecure/postgres-stretch.cdx.xml | +| https://osv.dev/DLA-3942-1 | | Debian | openssl | 1.1.0l-1~deb9u5 | fixtures/sbom-insecure/postgres-stretch.cdx.xml | | https://osv.dev/DSA-4539-3 | | Debian | openssl | 1.1.0l-1~deb9u5 | fixtures/sbom-insecure/postgres-stretch.cdx.xml | | https://osv.dev/CVE-2017-12837 | 7.5 | Debian | perl | 5.24.1-3+deb9u7 | fixtures/sbom-insecure/postgres-stretch.cdx.xml | | https://osv.dev/CVE-2017-12883 | 9.1 | Debian | perl | 5.24.1-3+deb9u7 | fixtures/sbom-insecure/postgres-stretch.cdx.xml | @@ -2328,7 +2330,7 @@ No issues found --- [TestRun_LockfileWithExplicitParseAs/empty_works_as_an_escape_(no_fixture_because_it's_not_valid_on_Windows) - 2] -open /path/to/my:file: no such file or directory +stat /path/to/my:file: no such file or directory --- @@ -2337,7 +2339,7 @@ open /path/to/my:file: no such file or directory --- [TestRun_LockfileWithExplicitParseAs/empty_works_as_an_escape_(no_fixture_because_it's_not_valid_on_Windows)#01 - 2] -open /path/to/my:project/package-lock.json: no such file or directory +stat /path/to/my:project/package-lock.json: no such file or directory --- @@ -2346,7 +2348,7 @@ open /path/to/my:project/package-lock.json: no such file or directory --- [TestRun_LockfileWithExplicitParseAs/files_that_error_on_parsing_stop_parsable_files_from_being_checked - 2] -(extracting as Cargo.lock) could not extract from /fixtures/locks-insecure/my-package-lock.json: toml: line 1: expected '.' or '=', but got '{' instead +(extracting as rust/Cargolock) could not extract from /fixtures/locks-insecure/my-package-lock.json: toml: line 1: expected '.' or '=', but got '{' instead --- @@ -2404,7 +2406,7 @@ No issues found --- [TestRun_LockfileWithExplicitParseAs/parse-as_takes_priority,_even_if_it's_wrong - 2] -(extracting as package-lock.json) could not extract from /fixtures/locks-many/yarn.lock: invalid character '#' looking for beginning of value +(extracting as javascript/packagelockjson) could not extract from "/fixtures/locks-many/yarn.lock": invalid character '#' looking for beginning of value --- @@ -2586,17 +2588,17 @@ Scanning image ../../internal/image/fixtures/test-node_modules-npm-empty.tar [TestRun_OCIImage/scanning_node_modules_using_npm_with_some_packages - 1] Scanning image ../../internal/image/fixtures/test-node_modules-npm-full.tar -+-------------------------------------+------+--------------+----------+------------+-------------------------------------------------------------------------------------------------------+ -| OSV URL | CVSS | ECOSYSTEM | PACKAGE | VERSION | SOURCE | -+-------------------------------------+------+--------------+----------+------------+-------------------------------------------------------------------------------------------------------+ -| https://osv.dev/CVE-2023-42363 | 5.5 | Alpine:v3.19 | busybox | 1.36.1-r15 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/lib/apk/db/installed | -| https://osv.dev/CVE-2023-42364 | 5.5 | Alpine:v3.19 | busybox | 1.36.1-r15 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/lib/apk/db/installed | -| https://osv.dev/CVE-2023-42365 | 5.5 | Alpine:v3.19 | busybox | 1.36.1-r15 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/lib/apk/db/installed | -| https://osv.dev/CVE-2023-42366 | 5.5 | Alpine:v3.19 | busybox | 1.36.1-r15 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/lib/apk/db/installed | -| https://osv.dev/GHSA-38f5-ghc2-fcmv | 9.8 | npm | cryo | 0.0.6 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/usr/app/node_modules/.package-lock.json | -| https://osv.dev/GHSA-vh95-rmgr-6w4m | 9.8 | npm | minimist | 0.0.8 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/usr/app/node_modules/.package-lock.json | -| https://osv.dev/GHSA-xvch-5gv4-984h | | | | | | -+-------------------------------------+------+--------------+----------+------------+-------------------------------------------------------------------------------------------------------+ ++-------------------------------------+------+--------------+----------+------------+--------------------------------------------------------------------------------------------------------+ +| OSV URL | CVSS | ECOSYSTEM | PACKAGE | VERSION | SOURCE | ++-------------------------------------+------+--------------+----------+------------+--------------------------------------------------------------------------------------------------------+ +| https://osv.dev/CVE-2023-42363 | 5.5 | Alpine:v3.19 | busybox | 1.36.1-r15 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/lib/apk/db/installed | +| https://osv.dev/CVE-2023-42364 | 5.5 | Alpine:v3.19 | busybox | 1.36.1-r15 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/lib/apk/db/installed | +| https://osv.dev/CVE-2023-42365 | 5.5 | Alpine:v3.19 | busybox | 1.36.1-r15 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/lib/apk/db/installed | +| https://osv.dev/CVE-2023-42366 | 5.5 | Alpine:v3.19 | busybox | 1.36.1-r15 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/lib/apk/db/installed | +| https://osv.dev/GHSA-38f5-ghc2-fcmv | 9.8 | npm | cryo | 0.0.6 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/prod/app/node_modules/.package-lock.json | +| https://osv.dev/GHSA-vh95-rmgr-6w4m | 9.8 | npm | minimist | 0.0.8 | ../../internal/image/fixtures/test-node_modules-npm-full.tar:/prod/app/node_modules/.package-lock.json | +| https://osv.dev/GHSA-xvch-5gv4-984h | | | | | | ++-------------------------------------+------+--------------+----------+------------+--------------------------------------------------------------------------------------------------------+ --- diff --git a/cmd/osv-scanner/fixtures/locks-requirements/my-requirements.txt b/cmd/osv-scanner/fixtures/locks-requirements/my-requirements.txt index 7e1060246f..0e463a4d02 100644 --- a/cmd/osv-scanner/fixtures/locks-requirements/my-requirements.txt +++ b/cmd/osv-scanner/fixtures/locks-requirements/my-requirements.txt @@ -1 +1 @@ -flask +flask==1.0.0 diff --git a/cmd/osv-scanner/fixtures/locks-requirements/requirements-dev.txt b/cmd/osv-scanner/fixtures/locks-requirements/requirements-dev.txt index 7e66a17d49..4fae28300e 100644 --- a/cmd/osv-scanner/fixtures/locks-requirements/requirements-dev.txt +++ b/cmd/osv-scanner/fixtures/locks-requirements/requirements-dev.txt @@ -1 +1 @@ -black +black==1.0.0 diff --git a/cmd/osv-scanner/fixtures/locks-requirements/requirements.txt b/cmd/osv-scanner/fixtures/locks-requirements/requirements.txt index d0dae5a60f..911f55bcf9 100644 --- a/cmd/osv-scanner/fixtures/locks-requirements/requirements.txt +++ b/cmd/osv-scanner/fixtures/locks-requirements/requirements.txt @@ -1,3 +1,3 @@ -flask -flask-cors +flask==1.0.0 +flask-cors==1.0.0 pandas==0.23.4 diff --git a/cmd/osv-scanner/fixtures/locks-requirements/the_requirements_for_test.txt b/cmd/osv-scanner/fixtures/locks-requirements/the_requirements_for_test.txt index e079f8a603..35663c020e 100644 --- a/cmd/osv-scanner/fixtures/locks-requirements/the_requirements_for_test.txt +++ b/cmd/osv-scanner/fixtures/locks-requirements/the_requirements_for_test.txt @@ -1 +1 @@ -pytest +pytest==1.0.0 diff --git a/cmd/osv-scanner/main_test.go b/cmd/osv-scanner/main_test.go index 58530d87c9..26466c7486 100644 --- a/cmd/osv-scanner/main_test.go +++ b/cmd/osv-scanner/main_test.go @@ -518,7 +518,12 @@ func TestRun_LockfileWithExplicitParseAs(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() - testCli(t, tt) + stdout, stderr := runCli(t, tt) + + testutility.NewSnapshot().MatchText(t, stdout) + testutility.NewSnapshot().WithWindowsReplacements(map[string]string{ + "CreateFile": "stat", + }).MatchText(t, stderr) }) } } diff --git a/go.mod b/go.mod index ff597a5a8a..190c3fc96e 100644 --- a/go.mod +++ b/go.mod @@ -18,7 +18,7 @@ require ( github.com/go-git/go-git/v5 v5.12.0 github.com/google/go-cmp v0.6.0 github.com/google/go-containerregistry v0.20.2 - github.com/google/osv-scalibr v0.1.4-0.20241016092100-7e7f0c6a01ec + github.com/google/osv-scalibr v0.1.4-0.20241031120023-761ca671aacb github.com/ianlancetaylor/demangle v0.0.0-20240912202439-0a2b6291aafd github.com/jedib0t/go-pretty/v6 v6.6.0 github.com/muesli/reflow v0.3.0 diff --git a/go.sum b/go.sum index 3c6e3f5db2..36bfaa2ada 100644 --- a/go.sum +++ b/go.sum @@ -109,8 +109,8 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-containerregistry v0.20.2 h1:B1wPJ1SN/S7pB+ZAimcciVD+r+yV/l/DSArMxlbwseo= github.com/google/go-containerregistry v0.20.2/go.mod h1:z38EKdKh4h7IP2gSfUUqEvalZBqs6AoLeWfUy34nQC8= -github.com/google/osv-scalibr v0.1.4-0.20241016092100-7e7f0c6a01ec h1:pbByndoAmqND/Vkj3wYLS2aDAq+/2dll7rKzIM3ezCU= -github.com/google/osv-scalibr v0.1.4-0.20241016092100-7e7f0c6a01ec/go.mod h1:MbEYB+PKqEGjwMdpcoO5DWpi0+57jYgYcw2jlRy8O9Q= +github.com/google/osv-scalibr v0.1.4-0.20241031120023-761ca671aacb h1:A7IvUJk8r3wMuuAMWxwbkE3WBp+oF/v7CcEt3nCy+lI= +github.com/google/osv-scalibr v0.1.4-0.20241031120023-761ca671aacb/go.mod h1:MbEYB+PKqEGjwMdpcoO5DWpi0+57jYgYcw2jlRy8O9Q= github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8= github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= diff --git a/internal/image/__snapshots__/image_test.snap b/internal/image/__snapshots__/image_test.snap index 9d957ad396..58b8b54897 100755 --- a/internal/image/__snapshots__/image_test.snap +++ b/internal/image/__snapshots__/image_test.snap @@ -4,7 +4,7 @@ "Lockfiles": [ { "filePath": "/lib/apk/db/installed", - "parsedAs": "apk-installed", + "parsedAs": "os/apk", "packages": [ { "name": "alpine-baselayout", @@ -186,7 +186,7 @@ "Lockfiles": [ { "filePath": "/go/bin/more-vuln-overwrite-less-vuln", - "parsedAs": "go-binary", + "parsedAs": "go/binary", "packages": [ { "name": "github.com/BurntSushi/toml", @@ -214,7 +214,7 @@ }, { "filePath": "/go/bin/ptf-1.2.0", - "parsedAs": "go-binary", + "parsedAs": "go/binary", "packages": [ { "name": "github.com/BurntSushi/toml", @@ -242,7 +242,7 @@ }, { "filePath": "/go/bin/ptf-1.3.0", - "parsedAs": "go-binary", + "parsedAs": "go/binary", "packages": [ { "name": "github.com/BurntSushi/toml", @@ -270,7 +270,7 @@ }, { "filePath": "/go/bin/ptf-1.3.0-moved", - "parsedAs": "go-binary", + "parsedAs": "go/binary", "packages": [ { "name": "github.com/BurntSushi/toml", @@ -298,7 +298,7 @@ }, { "filePath": "/go/bin/ptf-1.4.0", - "parsedAs": "go-binary", + "parsedAs": "go/binary", "packages": [ { "name": "github.com/BurntSushi/toml", @@ -326,7 +326,7 @@ }, { "filePath": "/go/bin/ptf-vulnerable", - "parsedAs": "go-binary", + "parsedAs": "go/binary", "packages": [ { "name": "github.com/BurntSushi/toml", @@ -354,7 +354,7 @@ }, { "filePath": "/lib/apk/db/installed", - "parsedAs": "apk-installed", + "parsedAs": "os/apk", "packages": [ { "name": "alpine-baselayout", @@ -536,7 +536,7 @@ "Lockfiles": [ { "filePath": "/lib/apk/db/installed", - "parsedAs": "apk-installed", + "parsedAs": "os/apk", "packages": [ { "name": "alpine-baselayout", @@ -754,7 +754,7 @@ "Lockfiles": [ { "filePath": "/lib/apk/db/installed", - "parsedAs": "apk-installed", + "parsedAs": "os/apk", "packages": [ { "name": "alpine-baselayout", @@ -963,8 +963,8 @@ ] }, { - "filePath": "/usr/app/node_modules/.package-lock.json", - "parsedAs": "node_modules", + "filePath": "/prod/app/node_modules/.package-lock.json", + "parsedAs": "javascript/nodemodules", "packages": [ { "name": "cryo", @@ -1011,7 +1011,7 @@ "Lockfiles": [ { "filePath": "/lib/apk/db/installed", - "parsedAs": "apk-installed", + "parsedAs": "os/apk", "packages": [ { "name": "alpine-baselayout", @@ -1229,7 +1229,7 @@ "Lockfiles": [ { "filePath": "/lib/apk/db/installed", - "parsedAs": "apk-installed", + "parsedAs": "os/apk", "packages": [ { "name": "alpine-baselayout", @@ -1447,7 +1447,7 @@ "Lockfiles": [ { "filePath": "/lib/apk/db/installed", - "parsedAs": "apk-installed", + "parsedAs": "os/apk", "packages": [ { "name": "alpine-baselayout", @@ -1665,7 +1665,7 @@ "Lockfiles": [ { "filePath": "/lib/apk/db/installed", - "parsedAs": "apk-installed", + "parsedAs": "os/apk", "packages": [ { "name": "alpine-baselayout", diff --git a/internal/image/extractor.go b/internal/image/extractor.go index 6ddb7f9f16..18dad0ed63 100644 --- a/internal/image/extractor.go +++ b/internal/image/extractor.go @@ -1,57 +1,79 @@ package image import ( + "context" "errors" "fmt" - "os" - "path" - "sort" - + "io/fs" + "strings" + + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/extractor/filesystem/language/golang/gobinary" + "github.com/google/osv-scalibr/extractor/filesystem/os/apk" + "github.com/google/osv-scalibr/extractor/filesystem/os/dpkg" + "github.com/google/osv-scanner/internal/lockfilescalibr" + "github.com/google/osv-scanner/internal/lockfilescalibr/language/javascript/nodemodules" "github.com/google/osv-scanner/pkg/lockfile" ) // artifactExtractors contains only extractors for artifacts that are important in // the final layer of a container image -var artifactExtractors map[string]lockfile.Extractor = map[string]lockfile.Extractor{ - "node_modules": lockfile.NodeModulesExtractor{}, - "apk-installed": lockfile.ApkInstalledExtractor{}, - "dpkg": lockfile.DpkgStatusExtractor{}, - "go-binary": lockfile.GoBinaryExtractor{}, -} - -type extractorPair struct { - extractor lockfile.Extractor - name string +var artifactExtractors []filesystem.Extractor = []filesystem.Extractor{ + // TODO: Using nodemodules extractor to minimize changes of snapshots + // After annotations are added, we should switch to using packagejson. + // packagejson.New(packagejson.DefaultConfig()), + nodemodules.Extractor{}, + + apk.New(apk.DefaultConfig()), + gobinary.New(gobinary.DefaultConfig()), + // TODO: Add tests for debian containers + dpkg.New(dpkg.DefaultConfig()), } -func findArtifactExtractor(path string) []extractorPair { +func findArtifactExtractor(path string, fileInfo fs.FileInfo) []filesystem.Extractor { // Use ShouldExtract to collect and return a slice of artifactExtractors - var extractors []extractorPair - for name, extractor := range artifactExtractors { - if extractor.ShouldExtract(path) { - extractors = append(extractors, extractorPair{extractor, name}) + var extractors []filesystem.Extractor + for _, extractor := range artifactExtractors { + if extractor.FileRequired(path, fileInfo) { + extractors = append(extractors, extractor) } } return extractors } -func extractArtifactDeps(path string, layer *Layer) (lockfile.Lockfile, error) { - foundExtractors := findArtifactExtractor(path) +// Note: Output is non deterministic +func extractArtifactDeps(extractPath string, layer *Layer) ([]*extractor.Inventory, error) { + pathFileInfo, err := layer.Stat(extractPath) + if err != nil { + return nil, fmt.Errorf("attempted to get FileInfo but failed: %w", err) + } + + scalibrPath := strings.TrimPrefix(extractPath, "/") + foundExtractors := findArtifactExtractor(scalibrPath, pathFileInfo) if len(foundExtractors) == 0 { - return lockfile.Lockfile{}, fmt.Errorf("%w for %s", lockfile.ErrExtractorNotFound, path) + return nil, fmt.Errorf("%w for %s", lockfilescalibr.ErrExtractorNotFound, extractPath) } - packages := []lockfile.PackageDetails{} + inventories := []*extractor.Inventory{} var extractedAs string - for _, extPair := range foundExtractors { + for _, extractor := range foundExtractors { // File has to be reopened per extractor as each extractor moves the read cursor - f, err := OpenLayerFile(path, layer) + f, err := layer.Open(extractPath) if err != nil { - return lockfile.Lockfile{}, fmt.Errorf("attempted to open file but failed: %w", err) + return nil, fmt.Errorf("attempted to open file but failed: %w", err) + } + + scanInput := &filesystem.ScanInput{ + FS: layer, + Path: scalibrPath, + Root: "/", + Reader: f, + Info: pathFileInfo, } - newPackages, err := extPair.extractor.Extract(f) + newPackages, err := extractor.Extract(context.Background(), scanInput) f.Close() if err != nil { @@ -59,76 +81,33 @@ func extractArtifactDeps(path string, layer *Layer) (lockfile.Lockfile, error) { continue } - return lockfile.Lockfile{}, fmt.Errorf("(extracting as %s) %w", extPair.name, err) + return nil, fmt.Errorf("(extracting as %s) %w", extractor.Name(), err) } - extractedAs = extPair.name - packages = newPackages - // TODO(rexpan): Determine if it's acceptable to have multiple extractors + for i := range newPackages { + newPackages[i].Extractor = extractor + } + + extractedAs = extractor.Name() + inventories = newPackages + // TODO(rexpan): Determine if this it's acceptable to have multiple extractors // extract from the same file successfully break } if extractedAs == "" { - return lockfile.Lockfile{}, fmt.Errorf("%w for %s", lockfile.ErrExtractorNotFound, path) + return nil, fmt.Errorf("%w for %s", lockfilescalibr.ErrExtractorNotFound, extractPath) } - // Sort to have deterministic output, and to match behavior of lockfile.extractDeps - sort.Slice(packages, func(i, j int) bool { - if packages[i].Name == packages[j].Name { - return packages[i].Version < packages[j].Version + // Perform any one-off translations here + for _, inv := range inventories { + // Scalibr uses go to indicate go compiler version + // We specifically cares about the stdlib version inside the package + // so convert the package name from go to stdlib + if inv.Ecosystem() == "Go" && inv.Name == "go" { + inv.Name = "stdlib" } - - return packages[i].Name < packages[j].Name - }) - - return lockfile.Lockfile{ - FilePath: path, - ParsedAs: extractedAs, - Packages: packages, - }, nil -} - -// A File represents a file that exists in an image -type File struct { - *os.File - - layer *Layer - path string -} - -func (f File) Open(openPath string) (lockfile.NestedDepFile, error) { - // use path instead of filepath, because container is always in Unix paths (for now) - if path.IsAbs(openPath) { - return OpenLayerFile(openPath, f.layer) - } - - absPath := path.Join(f.path, openPath) - - return OpenLayerFile(absPath, f.layer) -} - -func (f File) Path() string { - return f.path -} - -func OpenLayerFile(path string, layer *Layer) (File, error) { - fileNode, err := layer.getFileNode(path) - if err != nil { - return File{}, err } - file, err := fileNode.Open() - if err != nil { - return File{}, err - } - - return File{ - File: file, - path: path, - layer: layer, - }, nil + return inventories, nil } - -var _ lockfile.DepFile = File{} -var _ lockfile.NestedDepFile = File{} diff --git a/internal/image/fixtures/alpine-3.19-alpine-release b/internal/image/fixtures/alpine-3.18-alpine-release similarity index 100% rename from internal/image/fixtures/alpine-3.19-alpine-release rename to internal/image/fixtures/alpine-3.18-alpine-release diff --git a/internal/image/fixtures/alpine-3.18-os-release b/internal/image/fixtures/alpine-3.18-os-release new file mode 100644 index 0000000000..ffb92a8cd4 --- /dev/null +++ b/internal/image/fixtures/alpine-3.18-os-release @@ -0,0 +1,7 @@ +/ # cat /etc/os-release +NAME="Alpine Linux" +ID=alpine +VERSION_ID=3.18.1 +PRETTY_NAME="Alpine Linux v3.18" +HOME_URL="https://alpinelinux.org/" +BUG_REPORT_URL="https://gitlab.alpinelinux.org/alpine/aports/-/issues" diff --git a/internal/image/fixtures/test-alpine.Dockerfile b/internal/image/fixtures/test-alpine.Dockerfile index 5cf22e2812..d6aa79f1c8 100644 --- a/internal/image/fixtures/test-alpine.Dockerfile +++ b/internal/image/fixtures/test-alpine.Dockerfile @@ -1,4 +1,5 @@ FROM alpine:3.10@sha256:451eee8bedcb2f029756dc3e9d73bab0e7943c1ac55cff3a4861c52a0fdd3e98 -# Switch the version to 3.19 to show the advisories published for the latest alpine versions -COPY "alpine-3.19-alpine-release" "/etc/alpine-release" +# Switch the version to 3.18 to show the advisories published for the latest alpine versions +COPY "alpine-3.18-alpine-release" "/etc/alpine-release" +COPY "alpine-3.18-os-release" "/etc/os-release" diff --git a/internal/image/fixtures/test-node_modules-npm-empty.Dockerfile b/internal/image/fixtures/test-node_modules-npm-empty.Dockerfile index aa559ba285..67ff3b79f7 100644 --- a/internal/image/fixtures/test-node_modules-npm-empty.Dockerfile +++ b/internal/image/fixtures/test-node_modules-npm-empty.Dockerfile @@ -2,7 +2,7 @@ ARG MANAGER_VERSION="10.2.4" FROM node:20-alpine@sha256:c0a3badbd8a0a760de903e00cedbca94588e609299820557e72cba2a53dbaa2c -WORKDIR /usr/app +WORKDIR /prod/app # install the desired package manager RUN npm i -g "npm@$MANAGER_VERSION" diff --git a/internal/image/fixtures/test-node_modules-npm-full.Dockerfile b/internal/image/fixtures/test-node_modules-npm-full.Dockerfile index df412b7a12..96e136b5f7 100644 --- a/internal/image/fixtures/test-node_modules-npm-full.Dockerfile +++ b/internal/image/fixtures/test-node_modules-npm-full.Dockerfile @@ -2,7 +2,7 @@ ARG MANAGER_VERSION="10.2.4" FROM node:20-alpine@sha256:c0a3badbd8a0a760de903e00cedbca94588e609299820557e72cba2a53dbaa2c -WORKDIR /usr/app +WORKDIR /prod/app # install the desired package manager RUN npm i -g "npm@$MANAGER_VERSION" diff --git a/internal/image/fixtures/test-node_modules-pnpm-empty.Dockerfile b/internal/image/fixtures/test-node_modules-pnpm-empty.Dockerfile index 8912eef5d0..7a221ca7ea 100644 --- a/internal/image/fixtures/test-node_modules-pnpm-empty.Dockerfile +++ b/internal/image/fixtures/test-node_modules-pnpm-empty.Dockerfile @@ -2,7 +2,7 @@ ARG MANAGER_VERSION="8.15.4" FROM node:20-alpine@sha256:c0a3badbd8a0a760de903e00cedbca94588e609299820557e72cba2a53dbaa2c -WORKDIR /usr/app +WORKDIR /prod/app # install the desired package manager RUN npm i -g "pnpm@$MANAGER_VERSION" diff --git a/internal/image/fixtures/test-node_modules-pnpm-full.Dockerfile b/internal/image/fixtures/test-node_modules-pnpm-full.Dockerfile index 97a37c652a..80e1ee6519 100644 --- a/internal/image/fixtures/test-node_modules-pnpm-full.Dockerfile +++ b/internal/image/fixtures/test-node_modules-pnpm-full.Dockerfile @@ -2,7 +2,7 @@ ARG MANAGER_VERSION="8.15.4" FROM node:20-alpine@sha256:c0a3badbd8a0a760de903e00cedbca94588e609299820557e72cba2a53dbaa2c -WORKDIR /usr/app +WORKDIR /prod/app # install the desired package manager RUN npm i -g "pnpm@$MANAGER_VERSION" diff --git a/internal/image/fixtures/test-node_modules-yarn-empty.Dockerfile b/internal/image/fixtures/test-node_modules-yarn-empty.Dockerfile index 7158d5d258..41f4c2f423 100644 --- a/internal/image/fixtures/test-node_modules-yarn-empty.Dockerfile +++ b/internal/image/fixtures/test-node_modules-yarn-empty.Dockerfile @@ -2,7 +2,7 @@ ARG MANAGER_VERSION="1.22.22" FROM node:20-alpine@sha256:c0a3badbd8a0a760de903e00cedbca94588e609299820557e72cba2a53dbaa2c -WORKDIR /usr/app +WORKDIR /prod/app # install the desired package manager RUN npm i -g "yarn@$MANAGER_VERSION" --force diff --git a/internal/image/fixtures/test-node_modules-yarn-full.Dockerfile b/internal/image/fixtures/test-node_modules-yarn-full.Dockerfile index 54889d6804..99e9653f01 100644 --- a/internal/image/fixtures/test-node_modules-yarn-full.Dockerfile +++ b/internal/image/fixtures/test-node_modules-yarn-full.Dockerfile @@ -2,7 +2,7 @@ ARG MANAGER_VERSION="1.22.22" FROM node:20-alpine@sha256:c0a3badbd8a0a760de903e00cedbca94588e609299820557e72cba2a53dbaa2c -WORKDIR /usr/app +WORKDIR /prod/app # install the desired package manager RUN npm i -g "yarn@$MANAGER_VERSION" --force diff --git a/internal/image/image.go b/internal/image/image.go index 152ed1356c..0be6f53bf2 100644 --- a/internal/image/image.go +++ b/internal/image/image.go @@ -264,7 +264,7 @@ func inWhiteoutDir(fileMap Layer, filePath string) bool { if filePath == "" { break } - dirname := filepath.Dir(filePath) + dirname := path.Dir(filePath) if filePath == dirname { break } diff --git a/internal/image/image_test.go b/internal/image/image_test.go index 90bd028524..bc4397ab4e 100644 --- a/internal/image/image_test.go +++ b/internal/image/image_test.go @@ -3,7 +3,6 @@ package image_test import ( "errors" "os" - "sort" "testing" "github.com/google/osv-scanner/internal/image" @@ -94,10 +93,6 @@ func TestScanImage(t *testing.T) { } } - sort.Slice(got.Lockfiles, func(i, j int) bool { - return got.Lockfiles[i].FilePath < got.Lockfiles[j].FilePath - }) - tt.want.MatchJSON(t, got) }) } diff --git a/internal/image/layer.go b/internal/image/layer.go index fce43e0f1e..eb66a37752 100644 --- a/internal/image/layer.go +++ b/internal/image/layer.go @@ -3,6 +3,11 @@ package image import ( "io/fs" "os" + "strings" + "time" + + // Note that paths accessing the disk must use filepath, but all virtual paths should use path + "path" "path/filepath" "github.com/google/osv-scanner/internal/image/pathtree" @@ -26,6 +31,69 @@ type FileNode struct { permission fs.FileMode } +var _ fs.DirEntry = FileNode{} + +func (f FileNode) IsDir() bool { + return f.fileType == Dir +} + +func (f FileNode) Name() string { + return path.Base(f.virtualPath) +} + +func (f FileNode) Type() fs.FileMode { + return f.permission +} + +func (f FileNode) Info() (fs.FileInfo, error) { + return f.Stat() +} + +type FileNodeFileInfo struct { + baseFileInfo fs.FileInfo + fileNode *FileNode +} + +var _ fs.FileInfo = FileNodeFileInfo{} + +func (f FileNodeFileInfo) Name() string { + return path.Base(f.fileNode.virtualPath) +} + +func (f FileNodeFileInfo) Size() int64 { + return f.baseFileInfo.Size() +} + +func (f FileNodeFileInfo) Mode() fs.FileMode { + return f.fileNode.permission +} + +func (f FileNodeFileInfo) ModTime() time.Time { + return f.baseFileInfo.ModTime() +} + +func (f FileNodeFileInfo) IsDir() bool { + return f.fileNode.fileType == Dir +} + +func (f FileNodeFileInfo) Sys() any { + return nil +} + +// Stat returns the FileInfo structure describing file. +func (f *FileNode) Stat() (fs.FileInfo, error) { + baseFileInfo, err := os.Stat(f.absoluteDiskPath()) + if err != nil { + return nil, err + } + + return FileNodeFileInfo{ + baseFileInfo: baseFileInfo, + fileNode: f, + }, nil +} + +// Open returns a file handle for the file func (f *FileNode) Open() (*os.File, error) { if f.isWhiteout { return nil, fs.ErrNotExist @@ -47,8 +115,47 @@ type Layer struct { // TODO: Use hashmap to speed up path lookups } -func (filemap Layer) getFileNode(path string) (*FileNode, error) { - node := filemap.fileNodeTrie.Get(path) +func (filemap Layer) Open(path string) (fs.File, error) { + node, err := filemap.getFileNode(path) + if err != nil { + return nil, err + } + + return node.Open() +} + +func (filemap Layer) Stat(path string) (fs.FileInfo, error) { + node, err := filemap.getFileNode(path) + if err != nil { + return nil, err + } + + return node.Stat() +} + +func (filemap Layer) ReadDir(path string) ([]fs.DirEntry, error) { + children := filemap.fileNodeTrie.GetChildren(path) + output := make([]fs.DirEntry, 0, len(children)) + for _, node := range children { + output = append(output, node) + } + + return output, nil +} + +var _ fs.FS = Layer{} +var _ fs.StatFS = Layer{} +var _ fs.ReadDirFS = Layer{} + +func (filemap Layer) getFileNode(nodePath string) (*FileNode, error) { + // We expect all paths queried to be absolute paths rooted at the container root + // However, scalibr uses paths without a prepending /, because the paths are relative to Root. + // Root will always be '/' for container scanning, so prepend with / if necessary. + if !strings.HasPrefix(nodePath, "/") { + nodePath = path.Join("/", nodePath) + } + + node := filemap.fileNodeTrie.Get(nodePath) if node == nil { return nil, fs.ErrNotExist } diff --git a/internal/image/pathtree/pathtree_test.go b/internal/image/pathtree/pathtree_test.go index 9224bd92e9..556c97545a 100644 --- a/internal/image/pathtree/pathtree_test.go +++ b/internal/image/pathtree/pathtree_test.go @@ -195,8 +195,8 @@ func TestNode_GetChildren(t *testing.T) { tt.want, got, cmp.AllowUnexported(testVal{}), - cmpopts.SortSlices(func(a, b string) bool { - return strings.Compare(a, b) < 0 + cmpopts.SortSlices(func(a, b *testVal) bool { + return strings.Compare(a.string, b.string) < 0 })); diff != "" { t.Errorf("Node.GetChildren() (-want +got): %v", diff) } diff --git a/internal/image/scan.go b/internal/image/scan.go index 9bfc8ae02d..ccbd398b57 100644 --- a/internal/image/scan.go +++ b/internal/image/scan.go @@ -1,14 +1,21 @@ package image import ( + "cmp" "errors" "fmt" "io/fs" "log" + "path" + "slices" + "strings" + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scanner/internal/lockfilescalibr" "github.com/google/osv-scanner/pkg/lockfile" "github.com/google/osv-scanner/pkg/models" "github.com/google/osv-scanner/pkg/reporter" + "golang.org/x/exp/maps" ) // ScanImage scans an exported docker image .tar file @@ -22,33 +29,105 @@ func ScanImage(r reporter.Reporter, imagePath string) (ScanResults, error) { allFiles := img.LastLayer().AllFiles() - scannedLockfiles := ScanResults{ + scanResults := ScanResults{ ImagePath: imagePath, } + + inventories := []*extractor.Inventory{} + for _, file := range allFiles { if file.fileType != RegularFile { continue } - parsedLockfile, err := extractArtifactDeps(file.virtualPath, img.LastLayer()) + + // TODO: Currently osv-scalibr does not correctly annotate OS packages + // causing artifact extractors to double extract elements here. + // So let's skip all these directories for now. + // See (b/364536788) + // + // https://en.wikipedia.org/wiki/Filesystem_Hierarchy_Standard + // > Secondary hierarchy for read-only user data; contains the majority of (multi-)user utilities and applications. + // > Should be shareable and read-only. + // + if strings.HasPrefix(file.virtualPath, "/usr/") { + continue + } + + extractedInventories, err := extractArtifactDeps(file.virtualPath, img.LastLayer()) if err != nil { - if !errors.Is(err, lockfile.ErrExtractorNotFound) { + if !errors.Is(err, lockfilescalibr.ErrExtractorNotFound) { r.Errorf("Attempted to extract lockfile but failed: %s - %v\n", file.virtualPath, err) } continue } + inventories = append(inventories, extractedInventories...) + } + + // TODO: Remove the lockfile.Lockfile conversion + // Temporarily convert back to lockfile.Lockfiles to minimize snapshot changes + // This is done to verify the scanning behavior have not changed with this refactor + // and to minimize changes in the initial PR. + lockfiles := map[string]lockfile.Lockfile{} + for _, i := range inventories { + if len(i.Annotations) > 1 { + log.Printf("%v", i.Annotations) + } + lf, exists := lockfiles[path.Join("/", i.Locations[0])] + if !exists { + lf = lockfile.Lockfile{ + FilePath: path.Join("/", i.Locations[0]), + ParsedAs: i.Extractor.Name(), + } + } + + pkg := lockfile.PackageDetails{ + Name: i.Name, + Version: i.Version, + Ecosystem: lockfile.Ecosystem(i.Ecosystem()), + CompareAs: lockfile.Ecosystem(strings.Split(i.Ecosystem(), ":")[0]), + } + if i.SourceCode != nil { + pkg.Commit = i.SourceCode.Commit + } - scannedLockfiles.Lockfiles = append(scannedLockfiles.Lockfiles, parsedLockfile) + lf.Packages = append(lf.Packages, pkg) + + lockfiles[path.Join("/", i.Locations[0])] = lf + } + + for _, l := range lockfiles { + slices.SortFunc(l.Packages, func(a, b lockfile.PackageDetails) int { + return cmp.Or( + strings.Compare(a.Name, b.Name), + strings.Compare(a.Version, b.Version), + ) + }) } - traceOrigin(img, &scannedLockfiles) + scanResults.Lockfiles = maps.Values(lockfiles) + slices.SortFunc(scanResults.Lockfiles, func(a, b lockfile.Lockfile) int { + return strings.Compare(a.FilePath, b.FilePath) + }) + + traceOrigin(img, &scanResults) + + // TODO: Reenable this sort when removing lockfile.Lockfile + // Sort to have deterministic output, and to match behavior of lockfile.extractDeps + // slices.SortFunc(scanResults.Inventories, func(a, b *extractor.Inventory) int { + // // TODO: Should we consider errors here? + // aPURL, _ := a.Extractor.ToPURL(a) + // bPURL, _ := b.Extractor.ToPURL(b) + + // return strings.Compare(aPURL.ToString(), bPURL.ToString()) + // }) err = img.Cleanup() if err != nil { err = fmt.Errorf("failed to cleanup: %w", img.Cleanup()) } - return scannedLockfiles, err + return scanResults, err } // traceOrigin fills out the originLayerID for each package in ScanResults @@ -60,15 +139,30 @@ func traceOrigin(img *Image, scannedLockfiles *ScanResults) { Name string Version string Commit string - Ecosystem lockfile.Ecosystem + Ecosystem string } + // TODO: Remove this function after fully migrating to extractor.Inventory makePDKey := func(pd lockfile.PackageDetails) PDKey { return PDKey{ Name: pd.Name, Version: pd.Version, Commit: pd.Commit, - Ecosystem: pd.Ecosystem, + Ecosystem: string(pd.Ecosystem), + } + } + + makePDKey2 := func(pd *extractor.Inventory) PDKey { + var commit string + if pd.SourceCode != nil { + commit = pd.SourceCode.Commit + } + + return PDKey{ + Name: pd.Name, + Version: pd.Version, + Commit: commit, + Ecosystem: pd.Ecosystem(), } } @@ -120,12 +214,11 @@ func traceOrigin(img *Image, scannedLockfiles *ScanResults) { // Failed to parse an older version of file in image // Behave as if the file does not exist break - // log.Panicf("unimplemented! failed to parse an older version of file in image: %s@%s: %v", file.FilePath, oldFileNode.originLayer.id, err) } // For each package in the old version, check if it existed in the newer layer, if so, the origin must be this layer or earlier. - for _, pkg := range oldDeps.Packages { - key := makePDKey(pkg) + for _, pkg := range oldDeps { + key := makePDKey2(pkg) if val, ok := sourceLayerIdx[key]; ok && val == prevLayerIdx { sourceLayerIdx[key] = layerIdx } diff --git a/internal/lockfilescalibr/errors.go b/internal/lockfilescalibr/errors.go new file mode 100644 index 0000000000..005ee0012b --- /dev/null +++ b/internal/lockfilescalibr/errors.go @@ -0,0 +1,9 @@ +package lockfilescalibr + +import "errors" + +var ErrIncompatibleFileFormat = errors.New("file format is incompatible, but this is expected") +var ErrNotImplemented = errors.New("not implemented") +var ErrWrongExtractor = errors.New("this extractor did not create this inventory") +var ErrExtractorNotFound = errors.New("could not determine extractor") +var ErrNoExtractorsFound = errors.New("no extractors found to be suitable to this file") diff --git a/internal/lockfilescalibr/language/java/pomxmlnet/extractor.go b/internal/lockfilescalibr/language/java/pomxmlnet/extractor.go index 0fe2a502ed..3a1a5f51c0 100644 --- a/internal/lockfilescalibr/language/java/pomxmlnet/extractor.go +++ b/internal/lockfilescalibr/language/java/pomxmlnet/extractor.go @@ -169,16 +169,16 @@ func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) ([] } // ToPURL converts an inventory created by this extractor into a PURL. -func (e Extractor) ToPURL(i *extractor.Inventory) (*purl.PackageURL, error) { +func (e Extractor) ToPURL(i *extractor.Inventory) *purl.PackageURL { return &purl.PackageURL{ Type: purl.TypeMaven, Name: i.Name, Version: i.Version, - }, nil + } } // ToCPEs is not applicable as this extractor does not infer CPEs from the Inventory. -func (e Extractor) ToCPEs(_ *extractor.Inventory) ([]string, error) { return []string{}, nil } +func (e Extractor) ToCPEs(_ *extractor.Inventory) []string { return []string{} } // Ecosystem returns the OSV ecosystem ('npm') of the software extracted by this extractor. func (e Extractor) Ecosystem(_ *extractor.Inventory) string { diff --git a/internal/lockfilescalibr/language/java/pomxmlnet/extractor_test.go b/internal/lockfilescalibr/language/java/pomxmlnet/extractor_test.go index 5eb95ab9a7..556663be75 100644 --- a/internal/lockfilescalibr/language/java/pomxmlnet/extractor_test.go +++ b/internal/lockfilescalibr/language/java/pomxmlnet/extractor_test.go @@ -9,7 +9,6 @@ import ( "github.com/google/osv-scalibr/extractor" "github.com/google/osv-scalibr/extractor/filesystem/osv" "github.com/google/osv-scalibr/testing/extracttest" - "github.com/google/osv-scanner/internal/lockfilescalibr/language/java/pomxmlnet" "github.com/google/osv-scanner/internal/resolution/clienttest" "github.com/google/osv-scanner/internal/resolution/datasource" diff --git a/internal/lockfilescalibr/language/javascript/nodemodules/extractor.go b/internal/lockfilescalibr/language/javascript/nodemodules/extractor.go index 93bdb58c5d..a965b2fecd 100644 --- a/internal/lockfilescalibr/language/javascript/nodemodules/extractor.go +++ b/internal/lockfilescalibr/language/javascript/nodemodules/extractor.go @@ -40,12 +40,12 @@ func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) ([] } // ToPURL converts an inventory created by this extractor into a PURL. -func (e Extractor) ToPURL(i *extractor.Inventory) (*purl.PackageURL, error) { +func (e Extractor) ToPURL(i *extractor.Inventory) *purl.PackageURL { return e.actualExtractor.ToPURL(i) } // ToCPEs is not applicable as this extractor does not infer CPEs from the Inventory. -func (e Extractor) ToCPEs(i *extractor.Inventory) ([]string, error) { +func (e Extractor) ToCPEs(i *extractor.Inventory) []string { return e.actualExtractor.ToCPEs(i) } diff --git a/internal/lockfilescalibr/language/osv/osvscannerjson/extractor.go b/internal/lockfilescalibr/language/osv/osvscannerjson/extractor.go index e9f37de08b..27de9b2580 100644 --- a/internal/lockfilescalibr/language/osv/osvscannerjson/extractor.go +++ b/internal/lockfilescalibr/language/osv/osvscannerjson/extractor.go @@ -68,16 +68,13 @@ func (e Extractor) Extract(_ context.Context, input *filesystem.ScanInput) ([]*e } // ToPURL converts an inventory created by this extractor into a PURL. -func (e Extractor) ToPURL(i *extractor.Inventory) (*purl.PackageURL, error) { - return &purl.PackageURL{ - Type: purl.TypeNPM, - Name: i.Name, - Version: i.Version, - }, nil +func (e Extractor) ToPURL(_ *extractor.Inventory) *purl.PackageURL { + // TODO: support purl conversion + return nil } // ToCPEs is not applicable as this extractor does not infer CPEs from the Inventory. -func (e Extractor) ToCPEs(_ *extractor.Inventory) ([]string, error) { return []string{}, nil } +func (e Extractor) ToCPEs(_ *extractor.Inventory) []string { return []string{} } // Ecosystem returns the OSV ecosystem ('npm') of the software extracted by this extractor. func (e Extractor) Ecosystem(i *extractor.Inventory) string { diff --git a/internal/lockfilescalibr/translation.go b/internal/lockfilescalibr/translation.go new file mode 100644 index 0000000000..5cebcbf6a9 --- /dev/null +++ b/internal/lockfilescalibr/translation.go @@ -0,0 +1,188 @@ +package lockfilescalibr + +import ( + "context" + "fmt" + "io/fs" + "os" + "sort" + + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/extractor/filesystem/language/dart/pubspec" + "github.com/google/osv-scalibr/extractor/filesystem/language/dotnet/packageslockjson" + "github.com/google/osv-scalibr/extractor/filesystem/language/erlang/mixlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/golang/gomod" + "github.com/google/osv-scalibr/extractor/filesystem/language/java/gradlelockfile" + "github.com/google/osv-scalibr/extractor/filesystem/language/java/gradleverificationmetadataxml" + "github.com/google/osv-scalibr/extractor/filesystem/language/java/pomxml" + "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/packagelockjson" + "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/pnpmlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/yarnlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/php/composerlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/pdmlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/pipfilelock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/poetrylock" + "github.com/google/osv-scalibr/extractor/filesystem/language/python/requirements" + "github.com/google/osv-scalibr/extractor/filesystem/language/r/renvlock" + "github.com/google/osv-scalibr/extractor/filesystem/language/ruby/gemfilelock" + "github.com/google/osv-scalibr/extractor/filesystem/language/rust/cargolock" + + scalibrfs "github.com/google/osv-scalibr/fs" +) + +var lockfileExtractors = []filesystem.Extractor{ + // conanlock.Extractor{}, + packageslockjson.Extractor{}, + mixlock.Extractor{}, + pubspec.Extractor{}, + gomod.Extractor{}, + pomxml.Extractor{}, + gradlelockfile.Extractor{}, + gradleverificationmetadataxml.Extractor{}, + packagelockjson.Extractor{}, + pnpmlock.Extractor{}, + yarnlock.Extractor{}, + composerlock.Extractor{}, + pipfilelock.Extractor{}, + pdmlock.Extractor{}, + poetrylock.Extractor{}, + requirements.Extractor{}, + renvlock.Extractor{}, + gemfilelock.Extractor{}, + cargolock.Extractor{}, +} + +var lockfileExtractorMapping = map[string]string{ + "pubspec.lock": "dart/pubspec", + "pnpm-lock.yaml": "javascript/pnpmlock", + "yarn.lock": "javascript/yarnlock", + "package-lock.json": "javascript/packagelockjson", + "pom.xml": "java/pomxml", + "buildscript-gradle.lockfile": "java/gradlelockfile", + "gradle.lockfile": "java/gradlelockfile", + "verification-metadata.xml": "java/gradleverificationmetadataxml", + "poetry.lock": "python/poetrylock", + "Pipfile.lock": "python/Pipfilelock", + "pdm.lock": "python/pdmlock", + "requirements.txt": "python/requirements", + "Cargo.lock": "rust/Cargolock", + "composer.lock": "php/composerlock", + "mix.lock": "erlang/mixlock", + "renv.lock": "r/renvlock", + "packages.lock.json": "dotnet/packageslockjson", + // "conan.lock": "cpp/conanlock", + "go.mod": "go/gomod", + "Gemfile.lock": "ruby/gemfilelock", +} + +// ExtractWithExtractor attempts to extract the file at the given path with the extractor passed in +func ExtractWithExtractor(ctx context.Context, localPath string, ext filesystem.Extractor) ([]*extractor.Inventory, error) { + info, err := os.Stat(localPath) + if err != nil { + return nil, err + } + + return extractWithExtractor(ctx, localPath, info, ext) +} + +// Extract attempts to extract the file at the given path +// +// Args: +// - localPath: the path to the lockfile +// - extractAs: the name of the lockfile format to extract as (Using OSV-Scanner V1 extractor names) +// +// Returns: +// - []*extractor.Inventory: the extracted lockfile data +// - error: any errors encountered during extraction +// +// If extractAs is not specified, then the function will attempt to +// identify the lockfile format based on the file name. +// +// If no extractors are found, then ErrNoExtractorsFound is returned. +func Extract(ctx context.Context, localPath string, extractAs string) ([]*extractor.Inventory, error) { + info, err := os.Stat(localPath) + if err != nil { + return nil, err + } + + if extractAs != "" { + return extractAsSpecific(ctx, extractAs, localPath, info) + } + + output := []*extractor.Inventory{} + extractorFound := false + + for _, ext := range lockfileExtractors { + if ext.FileRequired(localPath, info) { + extractorFound = true + + inv, err := extractWithExtractor(ctx, localPath, info, ext) + if err != nil { + return nil, err + } + + output = append(output, inv...) + } + } + + if !extractorFound { + return nil, ErrNoExtractorsFound + } + + sort.Slice(output, func(i, j int) bool { + if output[i].Name == output[j].Name { + return output[i].Version < output[j].Version + } + + return output[i].Name < output[j].Name + }) + + return output, nil +} + +// Use the extractor specified by extractAs string key +func extractAsSpecific(ctx context.Context, extractAs string, localPath string, info fs.FileInfo) ([]*extractor.Inventory, error) { + for _, ext := range lockfileExtractors { + if lockfileExtractorMapping[extractAs] == ext.Name() { + return extractWithExtractor(ctx, localPath, info, ext) + } + } + + return nil, fmt.Errorf("%w, requested %s", ErrExtractorNotFound, extractAs) +} + +func extractWithExtractor(ctx context.Context, localPath string, info fs.FileInfo, ext filesystem.Extractor) ([]*extractor.Inventory, error) { + si, err := createScanInput(localPath, info) + if err != nil { + return nil, err + } + + inv, err := ext.Extract(ctx, si) + if err != nil { + return nil, fmt.Errorf("(extracting as %s) %w", ext.Name(), err) + } + + for i := range inv { + inv[i].Extractor = ext + } + + return inv, nil +} + +func createScanInput(path string, fileInfo fs.FileInfo) (*filesystem.ScanInput, error) { + reader, err := os.Open(path) + if err != nil { + return nil, err + } + + si := filesystem.ScanInput{ + FS: os.DirFS("/").(scalibrfs.FS), + Path: path, + Root: "/", + Reader: reader, + Info: fileInfo, + } + + return &si, nil +} diff --git a/internal/lockfilescalibr/translation_test.go b/internal/lockfilescalibr/translation_test.go new file mode 100644 index 0000000000..14c5f72e1d --- /dev/null +++ b/internal/lockfilescalibr/translation_test.go @@ -0,0 +1,23 @@ +package lockfilescalibr + +import ( + "testing" +) + +func TestLockfileScalibrMappingExists(t *testing.T) { + t.Parallel() + + for _, target := range lockfileExtractorMapping { + found := false + for _, ext := range lockfileExtractors { + if target == ext.Name() { + found = true + break + } + } + + if !found { + t.Errorf("Extractor %v not found.", target) + } + } +} diff --git a/pkg/osvscanner/osvscanner.go b/pkg/osvscanner/osvscanner.go index 68f0394513..9e74a64739 100644 --- a/pkg/osvscanner/osvscanner.go +++ b/pkg/osvscanner/osvscanner.go @@ -2,6 +2,8 @@ package osvscanner import ( "bufio" + "cmp" + "context" "crypto/md5" //nolint:gosec "errors" "fmt" @@ -11,15 +13,21 @@ import ( "path" "path/filepath" "slices" - "sort" "strings" + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem/os/apk" + "github.com/google/osv-scalibr/extractor/filesystem/os/dpkg" + scalibrosv "github.com/google/osv-scalibr/extractor/filesystem/osv" + "github.com/google/osv-scanner/internal/config" "github.com/google/osv-scanner/internal/customgitignore" "github.com/google/osv-scanner/internal/depsdev" "github.com/google/osv-scanner/internal/image" "github.com/google/osv-scanner/internal/local" - "github.com/google/osv-scanner/internal/manifest" + "github.com/google/osv-scanner/internal/lockfilescalibr" + "github.com/google/osv-scanner/internal/lockfilescalibr/language/java/pomxmlnet" + "github.com/google/osv-scanner/internal/lockfilescalibr/language/osv/osvscannerjson" "github.com/google/osv-scanner/internal/output" "github.com/google/osv-scanner/internal/resolution/client" "github.com/google/osv-scanner/internal/resolution/datasource" @@ -171,17 +179,19 @@ func scanDir(r reporter.Reporter, dir string, skipGit bool, recursive bool, useG } if !info.IsDir() { - if extractor, _ := lockfile.FindExtractor(path, ""); extractor != nil { - pkgs, err := scanLockfile(r, path, "", transitiveAct) - if err != nil { + pkgs, err := scanLockfile(r, path, "", transitiveAct) + if err != nil { + // If no extractors found then just continue + if !errors.Is(err, lockfilescalibr.ErrNoExtractorsFound) { r.Errorf("Attempted to scan lockfile but failed: %s\n", path) } - scannedPackages = append(scannedPackages, pkgs...) } + scannedPackages = append(scannedPackages, pkgs...) + // No need to check for error // If scan fails, it means it isn't a valid SBOM file, // so just move onto the next file - pkgs, _ := scanSBOMFile(r, path, true) + pkgs, _ = scanSBOMFile(r, path, true) scannedPackages = append(scannedPackages, pkgs...) } @@ -356,27 +366,29 @@ func scanImage(r reporter.Reporter, path string) ([]scannedPackage, error) { // within to `query` func scanLockfile(r reporter.Reporter, path string, parseAs string, transitiveAct TransitiveScanningActions) ([]scannedPackage, error) { var err error - var parsedLockfile lockfile.Lockfile - - f, err := lockfile.OpenLocalDepFile(path) - - if err == nil { - // special case for the APK and DPKG parsers because they have a very generic name while - // living at a specific location, so they are not included in the map of parsers - // used by lockfile.Parse to avoid false-positives when scanning projects - switch parseAs { - case "apk-installed": - parsedLockfile, err = lockfile.FromApkInstalled(path) - case "dpkg-status": - parsedLockfile, err = lockfile.FromDpkgStatus(path) - case "osv-scanner": - parsedLockfile, err = lockfile.FromOSVScannerResults(path) - default: - if !transitiveAct.Disabled && (parseAs == "pom.xml" || filepath.Base(path) == "pom.xml") { - parsedLockfile, err = extractMavenDeps(f, transitiveAct) - } else { - parsedLockfile, err = lockfile.ExtractDeps(f, parseAs) + + var inventories []*extractor.Inventory + + // special case for the APK and DPKG parsers because they have a very generic name while + // living at a specific location, so they are not included in the map of parsers + // used by lockfile.Parse to avoid false-positives when scanning projects + switch parseAs { + case "apk-installed": + inventories, err = lockfilescalibr.ExtractWithExtractor(context.Background(), path, apk.New(apk.DefaultConfig())) + case "dpkg-status": + inventories, err = lockfilescalibr.ExtractWithExtractor(context.Background(), path, dpkg.New(dpkg.DefaultConfig())) + case "osv-scanner": + inventories, err = lockfilescalibr.ExtractWithExtractor(context.Background(), path, osvscannerjson.Extractor{}) + default: + if !transitiveAct.Disabled && (parseAs == "pom.xml" || filepath.Base(path) == "pom.xml") { + ext, extErr := createMavenExtractor(transitiveAct) + if extErr != nil { + return nil, extErr } + + inventories, err = lockfilescalibr.ExtractWithExtractor(context.Background(), path, ext) + } else { + inventories, err = lockfilescalibr.Extract(context.Background(), path, parseAs) } } @@ -390,33 +402,57 @@ func scanLockfile(r reporter.Reporter, path string, parseAs string, transitiveAc parsedAsComment = fmt.Sprintf("as a %s ", parseAs) } + slices.SortFunc(inventories, func(i, j *extractor.Inventory) int { + return cmp.Or( + strings.Compare(i.Name, j.Name), + strings.Compare(i.Version, j.Version), + ) + }) + + pkgCount := len(inventories) + r.Infof( "Scanned %s file %sand found %d %s\n", path, parsedAsComment, - len(parsedLockfile.Packages), - output.Form(len(parsedLockfile.Packages), "package", "packages"), + pkgCount, + output.Form(pkgCount, "package", "packages"), ) - packages := make([]scannedPackage, len(parsedLockfile.Packages)) - for i, pkgDetail := range parsedLockfile.Packages { - packages[i] = scannedPackage{ - Name: pkgDetail.Name, - Version: pkgDetail.Version, - Commit: pkgDetail.Commit, - Ecosystem: pkgDetail.Ecosystem, - DepGroups: pkgDetail.DepGroups, + packages := make([]scannedPackage, 0, pkgCount) + + for _, inv := range inventories { + scannedPackage := scannedPackage{ + Name: inv.Name, + Version: inv.Version, Source: models.SourceInfo{ Path: path, Type: "lockfile", }, } + if inv.SourceCode != nil { + scannedPackage.Commit = inv.SourceCode.Commit + } + eco := inv.Ecosystem() + // TODO(rexpan): Refactor these minor patches to individual items + // TODO: Ecosystem should be pared with Enum : Suffix + if eco == "Alpine" { + eco = "Alpine:v3.20" + } + + scannedPackage.Ecosystem = lockfile.Ecosystem(eco) + + if dg, ok := inv.Metadata.(scalibrosv.DepGroups); ok { + scannedPackage.DepGroups = dg.DepGroups() + } + + packages = append(packages, scannedPackage) } return packages, nil } -func extractMavenDeps(f lockfile.DepFile, actions TransitiveScanningActions) (lockfile.Lockfile, error) { +func createMavenExtractor(actions TransitiveScanningActions) (*pomxmlnet.Extractor, error) { var depClient client.DependencyClient var err error if actions.NativeDataSource { @@ -425,37 +461,20 @@ func extractMavenDeps(f lockfile.DepFile, actions TransitiveScanningActions) (lo depClient, err = client.NewDepsDevClient(depsdev.DepsdevAPI) } if err != nil { - return lockfile.Lockfile{}, err + return nil, err } mavenClient, err := datasource.NewMavenRegistryAPIClient(actions.MavenRegistry) if err != nil { - return lockfile.Lockfile{}, err + return nil, err } - extractor := manifest.MavenResolverExtractor{ + extractor := pomxmlnet.Extractor{ DependencyClient: depClient, MavenRegistryAPIClient: mavenClient, } - packages, err := extractor.Extract(f) - if err != nil { - err = fmt.Errorf("failed extracting %s: %w", f.Path(), err) - } - - // Sort packages for testing convenience. - sort.Slice(packages, func(i, j int) bool { - if packages[i].Name == packages[j].Name { - return packages[i].Version < packages[j].Version - } - return packages[i].Name < packages[j].Name - }) - - return lockfile.Lockfile{ - FilePath: f.Path(), - ParsedAs: "pom.xml", - Packages: packages, - }, err + return &extractor, nil } // scanSBOMFile will load, identify, and parse the SBOM path passed in, and add the dependencies specified @@ -1055,7 +1074,12 @@ func filterIgnoredPackages(r reporter.Reporter, packages []scannedPackage, confi } if ignore, ignoreLine := configToUse.ShouldIgnorePackage(pkg); ignore { - pkgString := fmt.Sprintf("%s/%s/%s", p.Ecosystem, p.Name, p.Version) + var pkgString string + if p.PURL != "" { + pkgString = p.PURL + } else { + pkgString = fmt.Sprintf("%s/%s/%s", p.Ecosystem, p.Name, p.Version) + } reason := ignoreLine.Reason if reason == "" {