Skip to content

Commit

Permalink
Adding means to enrich reports with atom reachability capabilities.
Browse files Browse the repository at this point in the history
  • Loading branch information
andream16 committed Sep 10, 2024
1 parent 4fe6167 commit 6d4bef5
Show file tree
Hide file tree
Showing 24 changed files with 2,018 additions and 0 deletions.
92 changes: 92 additions & 0 deletions components/enrichers/reachability/cmd/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package main

import (
"context"
"errors"
"fmt"
"log/slog"
"os/signal"
"syscall"

"golang.org/x/sync/errgroup"

"github.com/ocurity/dracon/components/enrichers/reachability/internal/atom"
"github.com/ocurity/dracon/components/enrichers/reachability/internal/atom/purl"
"github.com/ocurity/dracon/components/enrichers/reachability/internal/conf"
"github.com/ocurity/dracon/components/enrichers/reachability/internal/enricher"
"github.com/ocurity/dracon/components/enrichers/reachability/internal/fs"
"github.com/ocurity/dracon/components/enrichers/reachability/internal/logging"
)

func main() {
ctx, cancel := signal.NotifyContext(
context.Background(),
syscall.SIGTERM,
syscall.SIGQUIT,
syscall.SIGABRT,
syscall.SIGINT,
syscall.SIGKILL,
)

defer cancel()

logger := logging.NewLogger()
ctx = logging.WithContext(ctx, logger)

if err := Main(ctx, cancel); err != nil {
logger.Error("unexpected error", slog.String("err", err.Error()))
}
}

func Main(ctx context.Context, cancel func()) error {
cfg, err := conf.New()
if err != nil {
return fmt.Errorf("could not load configuration: %w", err)
}

purlParser, err := purl.NewParser()
if err != nil {
return fmt.Errorf("could not initialize purl parser: %w", err)
}

atomReader, err := atom.NewReader(cfg.ATOMFilePath, purlParser)
if err != nil {
return fmt.Errorf("could not initialize atom reader: %w", err)
}

fsReadWriter, err := fs.NewReadWriter(cfg.ProducerResultsPath, cfg.EnrichedResultsPath)
if err != nil {
return fmt.Errorf("could not initialize filesystem read/writer: %w", err)
}

enr, err := enricher.NewEnricher(cfg, atomReader, fsReadWriter)
if err != nil {
return fmt.Errorf("could not initialize enricher: %w", err)
}

g, egCtx := errgroup.WithContext(ctx)

// Terminates earlier if the context is cancelled.
g.Go(func() error {
<-egCtx.Done()
return egCtx.Err()
})

g.Go(func() error {
if err := enr.Enrich(egCtx); err != nil {
return fmt.Errorf("unexpected error while enriching: %w", err)
}
cancel()
return nil
})

if err := g.Wait(); err != nil && !isCtxErr(err) {
return fmt.Errorf("unexpected error in waitgroup: %w", err)
}

return nil
}

func isCtxErr(err error) bool {
return errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled)
}
102 changes: 102 additions & 0 deletions components/enrichers/reachability/cmd/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package main

import (
"context"
"os"
"path"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

const (
baseTestdataPath = "../test/testdata"

banditRawFileName = "bandit.raw.pb"
banditEnrichedFileName = "bandit.reachability.enriched.pb"
safetyRawFileName = "pip-safety.raw.pb"
safetyEnrichedFileName = "pip-safety.reachability.enriched.pb"
)

var (
resultsFilesPath = path.Join(baseTestdataPath, "results")
expectedFilesPath = path.Join(baseTestdataPath, "expectations")

envVars = map[string]string{
"READ_PATH": baseTestdataPath,
"WRITE_PATH": resultsFilesPath,
"ATOM_FILE_PATH": path.Join(baseTestdataPath, "reachables.json"),
}
)

func TestEnricher(t *testing.T) {
// Cleanup test bed.
t.Cleanup(func() {
for ev := range envVars {
require.NoError(
t,
os.Unsetenv(ev),
)
}
require.NoError(t, os.RemoveAll(resultsFilesPath))
})

// Setup test bed.
for ev, val := range envVars {
require.NoError(t, os.Setenv(ev, val))
}

t.Run("it correctly cancels and returns earlier", func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond)
defer cancel()
require.NoError(t, Main(ctx, cancel))
})
t.Run("it enriches bandit and safety reports as expected", func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
defer cancel()

require.NoError(t, Main(ctx, cancel))

// Does the results folder exist?
require.DirExists(t, resultsFilesPath)
for _, fp := range []string{
banditRawFileName,
banditEnrichedFileName,
safetyRawFileName,
safetyEnrichedFileName,
} {
// Do all expected result files exist?
resFilePath := getResultPath(t, fp)
require.FileExistsf(t, resFilePath, "result file %s doesn't exist in path", resFilePath)
expFilePath := getExpectedPath(t, fp)
require.FileExistsf(t, resFilePath, "expected file %s doesn't exist in path", expFilePath)

resFile, err := os.ReadFile(resFilePath)
require.NoErrorf(t, err, "could not open results file %s", resFilePath)

expFile, err := os.ReadFile(expFilePath)
require.NoErrorf(t, err, "could not open expectations file %s", expFilePath)

assert.Equalf(
t,
string(resFile),
string(expFile),
"expected file %s doesn't match results file %s",
expFilePath,
resFilePath,
)
}
})
}

func getResultPath(t *testing.T, fileName string) string {
t.Helper()
return path.Join(resultsFilesPath, fileName)
}

func getExpectedPath(t *testing.T, fileName string) string {
t.Helper()
return path.Join(expectedFilesPath, fileName)
}
143 changes: 143 additions & 0 deletions components/enrichers/reachability/internal/atom/purl/purl.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package purl

import (
"fmt"
"regexp"
"strings"
)

// Parser allows to extract information from purls - https://github.com/package-url/purl-spec.
type Parser struct {
matcherPurlPkg *regexp.Regexp
matcherPurlTrailingVersion *regexp.Regexp
matcherPurlVersion *regexp.Regexp
}

func NewParser() (*Parser, error) {
purlPkg, err := regexp.Compile(`(?P<p1>[^/:]+/(?P<p2>[^/]+))(?:(?:.|/)v\d+)?@`)
if err != nil {
return nil, fmt.Errorf("failed to compile purl pkg regex: %w", err)
}
purlTrailingVersion, err := regexp.Compile(`[./]v\d+@`)
if err != nil {
return nil, fmt.Errorf("failed to compile purl trailing version regex: %w", err)
}
purlVersion, err := regexp.Compile(`@(?P<v1>v?(?P<v2>[\d.]+){1,3})(?P<ext>[^?\s]+)?`)
if err != nil {
return nil, fmt.Errorf("failed to compile purl version regex: %w", err)
}

return &Parser{
matcherPurlPkg: purlPkg,
matcherPurlTrailingVersion: purlTrailingVersion,
matcherPurlVersion: purlVersion,
}, nil
}

// ParsePurl extracts pkg:version matches from the supplied purl.
func (p *Parser) ParsePurl(purl string) ([]string, error) {
purl = p.matcherPurlTrailingVersion.ReplaceAllString(purl, "$1@")

var (
result []string
pkgs []string
versions []string
)

if match := p.matcherPurlVersion.FindStringSubmatch(purl); len(match) > 0 {
versions = p.parsePurlVersions(match)
}

if match := p.matcherPurlPkg.FindStringSubmatch(purl); len(match) > 0 {
pkgs = p.parsePurlPkgs(match)
}

for _, pkg := range pkgs {
for _, version := range versions {
result = append(result, fmt.Sprintf("%s:%s", pkg, version))
}
}

return p.removeDuplicates(result), nil
}

func (p *Parser) parsePurlVersions(matches []string) []string {
if len(matches) == 0 {
return make([]string, 0)
}

var (
pattern = p.matcherPurlVersion
versions []string
// Creating a map to ensure uniqueness
versionSet = make(map[string]struct{})

// Assuming the named groups are in the match
vers1 = matches[pattern.SubexpIndex("v1")]
vers2 = matches[pattern.SubexpIndex("v2")]
ext = matches[pattern.SubexpIndex("ext")]
)

// Adding the basic versions
versionSet[vers1] = struct{}{}
versionSet[vers2] = struct{}{}

// Adding the extended versions if ext exists
if ext != "" {
versionSet[vers1+ext] = struct{}{}
versionSet[vers2+ext] = struct{}{}
}

// Converting the map to a slice
for version := range versionSet {
versions = append(versions, version)
}

return versions
}

func (p *Parser) parsePurlPkgs(matches []string) []string {
var (
pattern = p.matcherPurlPkg
// Creating a map to ensure uniqueness
pkgSet = make(map[string]struct{})
pkgs []string
pkgStrReplacer = strings.NewReplacer(
// replaces "pypi/" with "".
"pypi/", "",
// replaces "npm/" with "".
"npm/", "",
// replaces "%40/" with "@".
"%40", "@",
)
)

// Adding the packages
pkgSet[matches[pattern.SubexpIndex("p1")]] = struct{}{}
pkgSet[matches[pattern.SubexpIndex("p2")]] = struct{}{}

// Converting the map to a slice and cleaning up the packages
for pkg := range pkgSet {
pkgs = append(pkgs, pkgStrReplacer.Replace(pkg))
}

return pkgs
}

func (p *Parser) removeDuplicates(matches []string) []string {
var (
result []string
encountered = make(map[string]struct{})
)

for match := range matches {
_, ok := encountered[matches[match]]
if ok {
continue
}
encountered[matches[match]] = struct{}{}
result = append(result, matches[match])
}

return result
}
17 changes: 17 additions & 0 deletions components/enrichers/reachability/internal/atom/purl/purl_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package purl_test

import (
"testing"

"github.com/stretchr/testify/require"

"github.com/ocurity/dracon/components/enrichers/reachability/internal/atom/purl"
)

func TestNewParser(t *testing.T) {
t.Run("should return new parser with valid matchers", func(t *testing.T) {
p, err := purl.NewParser()
require.NoError(t, err)
require.NotNil(t, p)
})
}
Loading

0 comments on commit 6d4bef5

Please sign in to comment.