Skip to content

Commit

Permalink
feat: Add artifact extractors (#1531)
Browse files Browse the repository at this point in the history
- Adds the python wheel.egg and java jar archive extractors.

- renames artifacts source from lockfile to artifact, so output will
actually be "artifact:<path to binary>".

- Reenable some of the image scanning tests accidentally removed from a
previous PR. The unrelated snapshot changes will be from that.

This PR is not ready to be merged in yet, waiting on a osv-scalibr
change to be merged in first.
(google/osv-scalibr#407)
  • Loading branch information
another-rex authored Jan 28, 2025
1 parent 9d55ac1 commit 3ee976e
Show file tree
Hide file tree
Showing 17 changed files with 729 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
.idea/
/dist/
/osv-scanner
/temp
/coverage.out
/coverage.html
*.tar
Expand Down
447 changes: 447 additions & 0 deletions cmd/osv-scanner/__snapshots__/main_test.snap

Large diffs are not rendered by default.

86 changes: 86 additions & 0 deletions cmd/osv-scanner/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main

import (
"bytes"
"errors"
"os"
"path/filepath"
"reflect"
Expand Down Expand Up @@ -817,6 +818,91 @@ func TestRun_Docker(t *testing.T) {
}
}

func TestRun_OCIImage(t *testing.T) {
t.Parallel()

testutility.SkipIfNotAcceptanceTesting(t, "Not consistent on MacOS/Windows")

tests := []cliTestCase{
{
name: "Invalid path",
args: []string{"", "scan", "image", "--archive", "./fixtures/oci-image/no-file-here.tar"},
exit: 127,
},
{
name: "Alpine 3.10 image tar with 3.18 version file",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-alpine.tar"},
exit: 1,
},
{
name: "Scanning python image with some packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-python-full.tar"},
exit: 1,
},
{
name: "Scanning python image with no packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-python-empty.tar"},
exit: 1,
},
{
name: "Scanning java image with some packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-java-full.tar"},
exit: 1,
},
{
name: "scanning node_modules using npm with no packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-npm-empty.tar"},
exit: 1,
},
{
name: "scanning node_modules using npm with some packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-npm-full.tar"},
exit: 1,
},
{
name: "scanning node_modules using yarn with no packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-yarn-empty.tar"},
exit: 1,
},
{
name: "scanning node_modules using yarn with some packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-yarn-full.tar"},
exit: 1,
},
{
name: "scanning node_modules using pnpm with no packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-pnpm-empty.tar"},
exit: 1,
},
{
name: "scanning node_modules using pnpm with some packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-pnpm-full.tar"},
exit: 1,
},
{
name: "scanning image with go binary",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-package-tracing.tar"},
exit: 1,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

// point out that we need the images to be built and saved separately
for _, arg := range tt.args {
if strings.HasPrefix(arg, "../../internal/image/fixtures/") && strings.HasSuffix(arg, ".tar") {
if _, err := os.Stat(arg); errors.Is(err, os.ErrNotExist) {
t.Fatalf("%s does not exist - have you run scripts/build_test_images.sh?", arg)
}
}
}

testCli(t, tt)
})
}
}

// Tests all subcommands here.
func TestRun_SubCommands(t *testing.T) {
t.Parallel()
Expand Down
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ require (
github.com/go-git/go-billy/v5 v5.6.2
github.com/go-git/go-git/v5 v5.13.1
github.com/google/go-cmp v0.6.0
github.com/google/osv-scalibr v0.1.6-0.20250123155336-85f39dea4c05
github.com/google/osv-scalibr v0.1.6-0.20250128013153-34aef7c77adf
github.com/ianlancetaylor/demangle v0.0.0-20240912202439-0a2b6291aafd
github.com/jedib0t/go-pretty/v6 v6.6.5
github.com/muesli/reflow v0.3.0
Expand Down Expand Up @@ -104,6 +104,7 @@ require (
github.com/mattn/go-runewidth v0.0.16 // indirect
github.com/mattn/go-sqlite3 v1.14.22 // indirect
github.com/microcosm-cc/bluemonday v1.0.27 // indirect
github.com/microsoft/go-rustaudit v0.0.0-20240820110456-0e2abec02f8b // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/moby/locker v1.0.1 // indirect
github.com/moby/sys/mountinfo v0.6.2 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ github.com/google/go-containerregistry v0.20.2 h1:B1wPJ1SN/S7pB+ZAimcciVD+r+yV/l
github.com/google/go-containerregistry v0.20.2/go.mod h1:z38EKdKh4h7IP2gSfUUqEvalZBqs6AoLeWfUy34nQC8=
github.com/google/osv-scalibr v0.1.6-0.20250123155336-85f39dea4c05 h1:47dObbqXVFPmg39yLeRWfKZYw2xR6O2BJVLmgC6Zygw=
github.com/google/osv-scalibr v0.1.6-0.20250123155336-85f39dea4c05/go.mod h1:nikSO3CqGGRQY05sGgzsgf4+84p5xCmPWOiaSomkuAU=
github.com/google/osv-scalibr v0.1.6-0.20250128013153-34aef7c77adf h1:s6PZEjcMocRehGjuHIFN7Chy8VlMw4XheLgLaWRx21U=
github.com/google/osv-scalibr v0.1.6-0.20250128013153-34aef7c77adf/go.mod h1:jKAptk1dYWBO91ODkI5XYKDDvZEbLKQH9DSXcTtUDSw=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
Expand Down Expand Up @@ -233,6 +235,8 @@ github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk=
github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
github.com/microsoft/go-rustaudit v0.0.0-20240820110456-0e2abec02f8b h1:84JbAJpjZ8p1ttV6dpIqfe8IehWMf0i8DPSgmE9aZuA=
github.com/microsoft/go-rustaudit v0.0.0-20240820110456-0e2abec02f8b/go.mod h1:vYT9HE7WCvL64iVeZylKmCsWKfE+JZ8105iuh2Trk8g=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=
Expand Down
57 changes: 57 additions & 0 deletions internal/image/fixtures/java-fixture/app/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.mycompany.app</groupId>
<artifactId>my-app</artifactId>
<version>1.0-SNAPSHOT</version>

<name>my-app</name>
<url>https://osv.dev</url>

<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.21</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>com.mycompany.app.App</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>

<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package com.mycompany.app;

/**
* Hello world!
*
*/
public class App
{
public static void main( String[] args )
{
System.out.println( "Hello World!" );
}
}
5 changes: 5 additions & 0 deletions internal/image/fixtures/python-fixture/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
def main():
return 'Hello, World!'

if __name__ == '__main__':
main()
3 changes: 3 additions & 0 deletions internal/image/fixtures/python-fixture/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
flask==0.12.2 # Vulnerable to CVE-2019-1010083
django==1.11.29 # Vulnerable to CVE-2021-35042
requests==2.20.0 # Vulnerable to CVE-2018-18074
25 changes: 25 additions & 0 deletions internal/image/fixtures/test-java-full.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Use the official OpenJDK image as the base image
# TODO: This has been deprecated and we might want to switch to another image
FROM openjdk:25-jdk-slim@sha256:34f10f3a1a5b638184ebd1c5c1b4aa4c49616ae3e5c1e845f0ac18c5332b5c6f

RUN apt update && apt install -y maven

# Set the working directory inside the container
WORKDIR /app

# Copy the project files into the container
COPY ./java-fixture/app .

# Download dependencies with maven
RUN mvn clean package

FROM alpine:3.21@sha256:56fa17d2a7e7f168a043a2712e63aed1f8543aeafdcee47c58dcffe38ed51099

RUN apk update && apk add openjdk21-jre

WORKDIR /app

COPY --from=0 /app/target/my-app-1.0-SNAPSHOT-jar-with-dependencies.jar target.jar

# Set the entry point to run the JAR file
ENTRYPOINT ["java", "-jar", "target.jar"]
11 changes: 11 additions & 0 deletions internal/image/fixtures/test-python-empty.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Use the official Debian image as the base
FROM python:3.9-slim-buster@sha256:320a7a4250aba4249f458872adecf92eea88dc6abd2d76dc5c0f01cac9b53990

# Set the working directory in the container
WORKDIR /app

# Copy the rest of the application code into the container
COPY python-fixture/main.py main.py

# Specify the command to run when the container starts
CMD ["python", "main.py"]
17 changes: 17 additions & 0 deletions internal/image/fixtures/test-python-full.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Use the official Debian image as the base
FROM python:3.9-slim-buster@sha256:320a7a4250aba4249f458872adecf92eea88dc6abd2d76dc5c0f01cac9b53990

# Set the working directory in the container
WORKDIR /app

# Copy the requirements file into the container
COPY ./python-fixture/requirements.txt .

# Install the Python dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Copy the rest of the application code into the container
COPY python-fixture/main.py main.py

# Specify the command to run when the container starts
CMD ["python", "main.py"]
35 changes: 35 additions & 0 deletions internal/imodels/imodels.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,20 @@ package imodels

import (
"log"
"strings"

"github.com/google/osv-scalibr/extractor"
"github.com/google/osv-scalibr/extractor/filesystem/language/golang/gobinary"
"github.com/google/osv-scalibr/extractor/filesystem/language/java/archive"
"github.com/google/osv-scalibr/extractor/filesystem/language/python/wheelegg"
"github.com/google/osv-scalibr/extractor/filesystem/os/apk"
"github.com/google/osv-scalibr/extractor/filesystem/os/dpkg"
"github.com/google/osv-scalibr/extractor/filesystem/os/rpm"
"github.com/google/osv-scalibr/extractor/filesystem/sbom/cdx"
"github.com/google/osv-scalibr/extractor/filesystem/sbom/spdx"
"github.com/google/osv-scanner/internal/cachedregexp"
"github.com/google/osv-scanner/internal/imodels/ecosystem"
"github.com/google/osv-scanner/internal/scalibrextract/language/javascript/nodemodules"
"github.com/google/osv-scanner/internal/scalibrextract/vcs/gitrepo"
"github.com/google/osv-scanner/pkg/models"
"github.com/ossf/osv-schema/bindings/go/osvschema"
Expand All @@ -32,6 +38,13 @@ var osExtractors = map[string]struct{}{
rpm.Extractor{}.Name(): {},
}

var artifactExtractors = map[string]struct{}{
nodemodules.Extractor{}.Name(): {},
gobinary.Extractor{}.Name(): {},
archive.Extractor{}.Name(): {},
wheelegg.Extractor{}.Name(): {},
}

// PackageInfo provides getter functions for commonly used fields of inventory
// and applies transformations when required for use in osv-scanner
type PackageInfo struct {
Expand All @@ -47,10 +60,29 @@ func (pkg *PackageInfo) Name() string {
return pkg.purlCache.Name
}

// --- Make specific patches to names as necessary ---
// Patch Go package to stdlib
if pkg.Ecosystem().Ecosystem == osvschema.EcosystemGo && pkg.Inventory.Name == "go" {
return "stdlib"
}

// TODO: Move the normalization to another where matching logic happens.
// Patch python package names to be normalized
if pkg.Ecosystem().Ecosystem == osvschema.EcosystemPyPI {
// per https://peps.python.org/pep-0503/#normalized-names
return strings.ToLower(cachedregexp.MustCompile(`[-_.]+`).ReplaceAllLiteralString(pkg.Inventory.Name, "-"))
}

// Patch Maven archive extractor package names
if metadata, ok := pkg.Inventory.Metadata.(*archive.Metadata); ok {
// Debian uses source name on osv.dev
// (fallback to using the normal name if source name is empty)
if metadata.ArtifactID != "" && metadata.GroupID != "" {
return metadata.GroupID + ":" + metadata.ArtifactID
}
}

// --- OS metadata ---
if metadata, ok := pkg.Inventory.Metadata.(*dpkg.Metadata); ok {
// Debian uses source name on osv.dev
// (fallback to using the normal name if source name is empty)
Expand Down Expand Up @@ -124,6 +156,8 @@ func (pkg *PackageInfo) SourceType() SourceType {
return SourceTypeSBOM
} else if _, ok := gitExtractors[extractorName]; ok {
return SourceTypeGit
} else if _, ok := artifactExtractors[extractorName]; ok {
return SourceTypeArtifact
}

return SourceTypeProjectPackage
Expand Down Expand Up @@ -187,6 +221,7 @@ const (
SourceTypeUnknown SourceType = iota
SourceTypeOSPackage
SourceTypeProjectPackage
SourceTypeArtifact
SourceTypeSBOM
SourceTypeGit
)
2 changes: 2 additions & 0 deletions pkg/osvscanner/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/google/osv-scanner/internal/imodels/results"
"github.com/google/osv-scanner/pkg/models"
"github.com/google/osv-scanner/pkg/reporter"
"github.com/ossf/osv-schema/bindings/go/osvschema"
)

// filterUnscannablePackages removes packages that don't have enough information to be scanned
Expand All @@ -21,6 +22,7 @@ func filterUnscannablePackages(r reporter.Reporter, scanResults *results.ScanRes
// If none of the cases match, skip this package since it's not scannable
case !p.Ecosystem().IsEmpty() && p.Name() != "" && p.Version() != "":
case p.Commit() != "":
case p.Ecosystem().Ecosystem == osvschema.EcosystemMaven && p.Name() == "unknown":
default:
continue
}
Expand Down
Loading

0 comments on commit 3ee976e

Please sign in to comment.