Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add artifact extractors #1531

Merged
merged 16 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
.idea/
/dist/
/osv-scanner
/temp
/coverage.out
/coverage.html
*.tar
Expand Down
447 changes: 447 additions & 0 deletions cmd/osv-scanner/__snapshots__/main_test.snap

Large diffs are not rendered by default.

86 changes: 86 additions & 0 deletions cmd/osv-scanner/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main

import (
"bytes"
"errors"
"os"
"path/filepath"
"reflect"
Expand Down Expand Up @@ -817,6 +818,91 @@ func TestRun_Docker(t *testing.T) {
}
}

func TestRun_OCIImage(t *testing.T) {
t.Parallel()

testutility.SkipIfNotAcceptanceTesting(t, "Not consistent on MacOS/Windows")

tests := []cliTestCase{
{
name: "Invalid path",
args: []string{"", "scan", "image", "--archive", "./fixtures/oci-image/no-file-here.tar"},
exit: 127,
},
{
name: "Alpine 3.10 image tar with 3.18 version file",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-alpine.tar"},
exit: 1,
},
{
name: "Scanning python image with some packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-python-full.tar"},
exit: 1,
},
{
name: "Scanning python image with no packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-python-empty.tar"},
exit: 1,
},
{
name: "Scanning java image with some packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-java-full.tar"},
exit: 1,
},
{
name: "scanning node_modules using npm with no packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-npm-empty.tar"},
exit: 1,
},
{
name: "scanning node_modules using npm with some packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-npm-full.tar"},
exit: 1,
},
{
name: "scanning node_modules using yarn with no packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-yarn-empty.tar"},
exit: 1,
},
{
name: "scanning node_modules using yarn with some packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-yarn-full.tar"},
exit: 1,
},
{
name: "scanning node_modules using pnpm with no packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-pnpm-empty.tar"},
exit: 1,
},
{
name: "scanning node_modules using pnpm with some packages",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-node_modules-pnpm-full.tar"},
exit: 1,
},
{
name: "scanning image with go binary",
args: []string{"", "scan", "image", "--archive", "../../internal/image/fixtures/test-package-tracing.tar"},
exit: 1,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

// point out that we need the images to be built and saved separately
for _, arg := range tt.args {
if strings.HasPrefix(arg, "../../internal/image/fixtures/") && strings.HasSuffix(arg, ".tar") {
if _, err := os.Stat(arg); errors.Is(err, os.ErrNotExist) {
t.Fatalf("%s does not exist - have you run scripts/build_test_images.sh?", arg)
}
}
}

testCli(t, tt)
})
}
}

// Tests all subcommands here.
func TestRun_SubCommands(t *testing.T) {
t.Parallel()
Expand Down
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ require (
github.com/go-git/go-billy/v5 v5.6.2
github.com/go-git/go-git/v5 v5.13.1
github.com/google/go-cmp v0.6.0
github.com/google/osv-scalibr v0.1.6-0.20250123155336-85f39dea4c05
github.com/google/osv-scalibr v0.1.6-0.20250128013153-34aef7c77adf
github.com/ianlancetaylor/demangle v0.0.0-20240912202439-0a2b6291aafd
github.com/jedib0t/go-pretty/v6 v6.6.5
github.com/muesli/reflow v0.3.0
Expand Down Expand Up @@ -104,6 +104,7 @@ require (
github.com/mattn/go-runewidth v0.0.16 // indirect
github.com/mattn/go-sqlite3 v1.14.22 // indirect
github.com/microcosm-cc/bluemonday v1.0.27 // indirect
github.com/microsoft/go-rustaudit v0.0.0-20240820110456-0e2abec02f8b // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/moby/locker v1.0.1 // indirect
github.com/moby/sys/mountinfo v0.6.2 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ github.com/google/go-containerregistry v0.20.2 h1:B1wPJ1SN/S7pB+ZAimcciVD+r+yV/l
github.com/google/go-containerregistry v0.20.2/go.mod h1:z38EKdKh4h7IP2gSfUUqEvalZBqs6AoLeWfUy34nQC8=
github.com/google/osv-scalibr v0.1.6-0.20250123155336-85f39dea4c05 h1:47dObbqXVFPmg39yLeRWfKZYw2xR6O2BJVLmgC6Zygw=
github.com/google/osv-scalibr v0.1.6-0.20250123155336-85f39dea4c05/go.mod h1:nikSO3CqGGRQY05sGgzsgf4+84p5xCmPWOiaSomkuAU=
github.com/google/osv-scalibr v0.1.6-0.20250128013153-34aef7c77adf h1:s6PZEjcMocRehGjuHIFN7Chy8VlMw4XheLgLaWRx21U=
github.com/google/osv-scalibr v0.1.6-0.20250128013153-34aef7c77adf/go.mod h1:jKAptk1dYWBO91ODkI5XYKDDvZEbLKQH9DSXcTtUDSw=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
Expand Down Expand Up @@ -233,6 +235,8 @@ github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk=
github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
github.com/microsoft/go-rustaudit v0.0.0-20240820110456-0e2abec02f8b h1:84JbAJpjZ8p1ttV6dpIqfe8IehWMf0i8DPSgmE9aZuA=
github.com/microsoft/go-rustaudit v0.0.0-20240820110456-0e2abec02f8b/go.mod h1:vYT9HE7WCvL64iVeZylKmCsWKfE+JZ8105iuh2Trk8g=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=
Expand Down
57 changes: 57 additions & 0 deletions internal/image/fixtures/java-fixture/app/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.mycompany.app</groupId>
<artifactId>my-app</artifactId>
<version>1.0-SNAPSHOT</version>

<name>my-app</name>
<url>https://osv.dev</url>

<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.21</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>com.mycompany.app.App</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>

<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package com.mycompany.app;

/**
* Hello world!
*
*/
public class App
{
public static void main( String[] args )
{
System.out.println( "Hello World!" );
}
}
5 changes: 5 additions & 0 deletions internal/image/fixtures/python-fixture/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
def main():
return 'Hello, World!'

if __name__ == '__main__':
main()
3 changes: 3 additions & 0 deletions internal/image/fixtures/python-fixture/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
flask==0.12.2 # Vulnerable to CVE-2019-1010083
django==1.11.29 # Vulnerable to CVE-2021-35042
requests==2.20.0 # Vulnerable to CVE-2018-18074
25 changes: 25 additions & 0 deletions internal/image/fixtures/test-java-full.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Use the official OpenJDK image as the base image
# TODO: This has been deprecated and we might want to switch to another image
FROM openjdk:25-jdk-slim@sha256:34f10f3a1a5b638184ebd1c5c1b4aa4c49616ae3e5c1e845f0ac18c5332b5c6f

RUN apt update && apt install -y maven

# Set the working directory inside the container
WORKDIR /app

# Copy the project files into the container
COPY ./java-fixture/app .

# Download dependencies with maven
RUN mvn clean package

FROM alpine:3.21@sha256:56fa17d2a7e7f168a043a2712e63aed1f8543aeafdcee47c58dcffe38ed51099

RUN apk update && apk add openjdk21-jre

WORKDIR /app

COPY --from=0 /app/target/my-app-1.0-SNAPSHOT-jar-with-dependencies.jar target.jar

# Set the entry point to run the JAR file
ENTRYPOINT ["java", "-jar", "target.jar"]
11 changes: 11 additions & 0 deletions internal/image/fixtures/test-python-empty.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Use the official Debian image as the base
FROM python:3.9-slim-buster@sha256:320a7a4250aba4249f458872adecf92eea88dc6abd2d76dc5c0f01cac9b53990

# Set the working directory in the container
WORKDIR /app

# Copy the rest of the application code into the container
COPY python-fixture/main.py main.py

# Specify the command to run when the container starts
CMD ["python", "main.py"]
17 changes: 17 additions & 0 deletions internal/image/fixtures/test-python-full.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Use the official Debian image as the base
FROM python:3.9-slim-buster@sha256:320a7a4250aba4249f458872adecf92eea88dc6abd2d76dc5c0f01cac9b53990

# Set the working directory in the container
WORKDIR /app

# Copy the requirements file into the container
COPY ./python-fixture/requirements.txt .

# Install the Python dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Copy the rest of the application code into the container
COPY python-fixture/main.py main.py

# Specify the command to run when the container starts
CMD ["python", "main.py"]
35 changes: 35 additions & 0 deletions internal/imodels/imodels.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,20 @@ package imodels

import (
"log"
"strings"

"github.com/google/osv-scalibr/extractor"
"github.com/google/osv-scalibr/extractor/filesystem/language/golang/gobinary"
"github.com/google/osv-scalibr/extractor/filesystem/language/java/archive"
"github.com/google/osv-scalibr/extractor/filesystem/language/python/wheelegg"
"github.com/google/osv-scalibr/extractor/filesystem/os/apk"
"github.com/google/osv-scalibr/extractor/filesystem/os/dpkg"
"github.com/google/osv-scalibr/extractor/filesystem/os/rpm"
"github.com/google/osv-scalibr/extractor/filesystem/sbom/cdx"
"github.com/google/osv-scalibr/extractor/filesystem/sbom/spdx"
"github.com/google/osv-scanner/internal/cachedregexp"
"github.com/google/osv-scanner/internal/imodels/ecosystem"
"github.com/google/osv-scanner/internal/scalibrextract/language/javascript/nodemodules"
"github.com/google/osv-scanner/internal/scalibrextract/vcs/gitrepo"
"github.com/google/osv-scanner/pkg/models"
"github.com/ossf/osv-schema/bindings/go/osvschema"
Expand All @@ -32,6 +38,13 @@ var osExtractors = map[string]struct{}{
rpm.Extractor{}.Name(): {},
}

var artifactExtractors = map[string]struct{}{
nodemodules.Extractor{}.Name(): {},
gobinary.Extractor{}.Name(): {},
archive.Extractor{}.Name(): {},
wheelegg.Extractor{}.Name(): {},
}

// PackageInfo provides getter functions for commonly used fields of inventory
// and applies transformations when required for use in osv-scanner
type PackageInfo struct {
Expand All @@ -47,10 +60,29 @@ func (pkg *PackageInfo) Name() string {
return pkg.purlCache.Name
}

// --- Make specific patches to names as necessary ---
// Patch Go package to stdlib
if pkg.Ecosystem().Ecosystem == osvschema.EcosystemGo && pkg.Inventory.Name == "go" {
return "stdlib"
}

// TODO: Move the normalization to another where matching logic happens.
// Patch python package names to be normalized
if pkg.Ecosystem().Ecosystem == osvschema.EcosystemPyPI {
// per https://peps.python.org/pep-0503/#normalized-names
return strings.ToLower(cachedregexp.MustCompile(`[-_.]+`).ReplaceAllLiteralString(pkg.Inventory.Name, "-"))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add a TODO to remove this after the API query change is deployed to production?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep good catch, I'll probably remove this entirely since I don't plan on merging this PR in until next week.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm it looks like there's more work to be done here:

  1. Offline scanning also needs to this normalization
  2. The code for identifying fix available/not available also needs this change.
    Added a TODO.

}

// Patch Maven archive extractor package names
if metadata, ok := pkg.Inventory.Metadata.(*archive.Metadata); ok {
// Debian uses source name on osv.dev
// (fallback to using the normal name if source name is empty)
if metadata.ArtifactID != "" && metadata.GroupID != "" {
return metadata.GroupID + ":" + metadata.ArtifactID
}
}

// --- OS metadata ---
if metadata, ok := pkg.Inventory.Metadata.(*dpkg.Metadata); ok {
// Debian uses source name on osv.dev
// (fallback to using the normal name if source name is empty)
Expand Down Expand Up @@ -124,6 +156,8 @@ func (pkg *PackageInfo) SourceType() SourceType {
return SourceTypeSBOM
} else if _, ok := gitExtractors[extractorName]; ok {
return SourceTypeGit
} else if _, ok := artifactExtractors[extractorName]; ok {
return SourceTypeArtifact
}

return SourceTypeProjectPackage
Expand Down Expand Up @@ -187,6 +221,7 @@ const (
SourceTypeUnknown SourceType = iota
SourceTypeOSPackage
SourceTypeProjectPackage
SourceTypeArtifact
SourceTypeSBOM
SourceTypeGit
)
2 changes: 2 additions & 0 deletions pkg/osvscanner/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/google/osv-scanner/internal/imodels/results"
"github.com/google/osv-scanner/pkg/models"
"github.com/google/osv-scanner/pkg/reporter"
"github.com/ossf/osv-schema/bindings/go/osvschema"
)

// filterUnscannablePackages removes packages that don't have enough information to be scanned
Expand All @@ -21,6 +22,7 @@ func filterUnscannablePackages(r reporter.Reporter, scanResults *results.ScanRes
// If none of the cases match, skip this package since it's not scannable
case !p.Ecosystem().IsEmpty() && p.Name() != "" && p.Version() != "":
case p.Commit() != "":
case p.Ecosystem().Ecosystem == osvschema.EcosystemMaven && p.Name() == "unknown":
default:
continue
}
Expand Down
Loading