Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding business logic for reachability enricher - leveraging atom. #340

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions components/enrichers/reachability/cmd/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package main
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you are not familiar with the concept, code in internal cannot be imported by other go modules. The build tool would report an error if this happens - leading to a failing build.

This makes sure to leak to potential user only the API that we actually want to expose. This is a nice Go built in feature


import (
"context"
"errors"
"fmt"
"log/slog"
"os/signal"
"syscall"

"golang.org/x/sync/errgroup"

"github.com/ocurity/dracon/components/enrichers/reachability/internal/atom"
"github.com/ocurity/dracon/components/enrichers/reachability/internal/atom/purl"
"github.com/ocurity/dracon/components/enrichers/reachability/internal/conf"
"github.com/ocurity/dracon/components/enrichers/reachability/internal/enricher"
"github.com/ocurity/dracon/components/enrichers/reachability/internal/fs"
"github.com/ocurity/dracon/components/enrichers/reachability/internal/logging"
)

func main() {
ctx, cancel := signal.NotifyContext(
context.Background(),
syscall.SIGTERM,
syscall.SIGQUIT,
syscall.SIGABRT,
syscall.SIGINT,
syscall.SIGKILL,
)

defer cancel()

logger := logging.NewLogger()
ctx = logging.WithContext(ctx, logger)

if err := Main(ctx, cancel); err != nil {
logger.Error("unexpected error", slog.String("err", err.Error()))
}
}

func Main(ctx context.Context, cancel func()) error {
cfg, err := conf.New()
if err != nil {
return fmt.Errorf("could not load configuration: %w", err)
}

purlParser, err := purl.NewParser()
if err != nil {
return fmt.Errorf("could not initialize purl parser: %w", err)
}

atomReader, err := atom.NewReader(cfg.ATOMFilePath, purlParser)
if err != nil {
return fmt.Errorf("could not initialize atom reader: %w", err)
}

fsReadWriter, err := fs.NewReadWriter(cfg.ProducerResultsPath, cfg.EnrichedResultsPath)
if err != nil {
return fmt.Errorf("could not initialize filesystem read/writer: %w", err)
}

enr, err := enricher.NewEnricher(cfg, atomReader, fsReadWriter)
if err != nil {
return fmt.Errorf("could not initialize enricher: %w", err)
}

g, egCtx := errgroup.WithContext(ctx)

// Terminates earlier if the context is cancelled.
g.Go(func() error {
<-egCtx.Done()
return egCtx.Err()
})

g.Go(func() error {
if err := enr.Enrich(egCtx); err != nil {
return fmt.Errorf("unexpected error while enriching: %w", err)
}
cancel()
return nil
})

if err := g.Wait(); err != nil && !isCtxErr(err) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gracefully exit when done and handle correctly context cancellations.

return fmt.Errorf("unexpected error in waitgroup: %w", err)
}

return nil
}

func isCtxErr(err error) bool {
return errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled)
}
102 changes: 102 additions & 0 deletions components/enrichers/reachability/cmd/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package main

import (
"context"
"os"
"path"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

const (
baseTestdataPath = "../test/testdata"

banditRawFileName = "bandit.raw.pb"
banditEnrichedFileName = "bandit.reachability.enriched.pb"
safetyRawFileName = "pip-safety.raw.pb"
safetyEnrichedFileName = "pip-safety.reachability.enriched.pb"
)

var (
resultsFilesPath = path.Join(baseTestdataPath, "results")
expectedFilesPath = path.Join(baseTestdataPath, "expectations")

envVars = map[string]string{
"READ_PATH": baseTestdataPath,
"WRITE_PATH": resultsFilesPath,
"ATOM_FILE_PATH": path.Join(baseTestdataPath, "reachables.json"),
}
)

func TestEnricher(t *testing.T) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the main test. Executes everything end to end.

// Cleanup test bed.
t.Cleanup(func() {
for ev := range envVars {
require.NoError(
t,
os.Unsetenv(ev),
)
}
require.NoError(t, os.RemoveAll(resultsFilesPath))
})

// Setup test bed.
for ev, val := range envVars {
require.NoError(t, os.Setenv(ev, val))
}

t.Run("it correctly cancels and returns earlier", func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond)
defer cancel()
require.NoError(t, Main(ctx, cancel))
})
t.Run("it enriches bandit and safety reports as expected", func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
defer cancel()

require.NoError(t, Main(ctx, cancel))

// Does the results folder exist?
require.DirExists(t, resultsFilesPath)
for _, fp := range []string{
banditRawFileName,
banditEnrichedFileName,
safetyRawFileName,
safetyEnrichedFileName,
} {
// Do all expected result files exist?
resFilePath := getResultPath(t, fp)
require.FileExistsf(t, resFilePath, "result file %s doesn't exist in path", resFilePath)
expFilePath := getExpectedPath(t, fp)
require.FileExistsf(t, resFilePath, "expected file %s doesn't exist in path", expFilePath)

resFile, err := os.ReadFile(resFilePath)
require.NoErrorf(t, err, "could not open results file %s", resFilePath)

expFile, err := os.ReadFile(expFilePath)
require.NoErrorf(t, err, "could not open expectations file %s", expFilePath)

assert.Equalf(
t,
string(resFile),
string(expFile),
"expected file %s doesn't match results file %s",
expFilePath,
resFilePath,
)
}
})
}

func getResultPath(t *testing.T, fileName string) string {
t.Helper()
return path.Join(resultsFilesPath, fileName)
}

func getExpectedPath(t *testing.T, fileName string) string {
t.Helper()
return path.Join(expectedFilesPath, fileName)
}
143 changes: 143 additions & 0 deletions components/enrichers/reachability/internal/atom/purl/purl.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package purl

import (
"fmt"
"regexp"
"strings"
)

// Parser allows to extract information from purls - https://github.com/package-url/purl-spec.
type Parser struct {
matcherPurlPkg *regexp.Regexp
matcherPurlTrailingVersion *regexp.Regexp
matcherPurlVersion *regexp.Regexp
}

func NewParser() (*Parser, error) {
purlPkg, err := regexp.Compile(`(?P<p1>[^/:]+/(?P<p2>[^/]+))(?:(?:.|/)v\d+)?@`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are we using regexp to parse this and not https://github.com/package-url/packageurl-go?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will look into it!

if err != nil {
return nil, fmt.Errorf("failed to compile purl pkg regex: %w", err)
}
purlTrailingVersion, err := regexp.Compile(`[./]v\d+@`)
if err != nil {
return nil, fmt.Errorf("failed to compile purl trailing version regex: %w", err)
}
purlVersion, err := regexp.Compile(`@(?P<v1>v?(?P<v2>[\d.]+){1,3})(?P<ext>[^?\s]+)?`)
if err != nil {
return nil, fmt.Errorf("failed to compile purl version regex: %w", err)
}

return &Parser{
matcherPurlPkg: purlPkg,
matcherPurlTrailingVersion: purlTrailingVersion,
matcherPurlVersion: purlVersion,
}, nil
}

// ParsePurl extracts pkg:version matches from the supplied purl.
func (p *Parser) ParsePurl(purl string) ([]string, error) {
purl = p.matcherPurlTrailingVersion.ReplaceAllString(purl, "$1@")

var (
result []string
pkgs []string
versions []string
)

if match := p.matcherPurlVersion.FindStringSubmatch(purl); len(match) > 0 {
versions = p.parsePurlVersions(match)
}

if match := p.matcherPurlPkg.FindStringSubmatch(purl); len(match) > 0 {
pkgs = p.parsePurlPkgs(match)
}

for _, pkg := range pkgs {
for _, version := range versions {
result = append(result, fmt.Sprintf("%s:%s", pkg, version))
}
}

return p.removeDuplicates(result), nil
}

func (p *Parser) parsePurlVersions(matches []string) []string {
if len(matches) == 0 {
return make([]string, 0)
}

var (
pattern = p.matcherPurlVersion
versions []string
// Creating a map to ensure uniqueness
versionSet = make(map[string]struct{})

// Assuming the named groups are in the match
vers1 = matches[pattern.SubexpIndex("v1")]
vers2 = matches[pattern.SubexpIndex("v2")]
ext = matches[pattern.SubexpIndex("ext")]
)

// Adding the basic versions
versionSet[vers1] = struct{}{}
versionSet[vers2] = struct{}{}

// Adding the extended versions if ext exists
if ext != "" {
versionSet[vers1+ext] = struct{}{}
versionSet[vers2+ext] = struct{}{}
}

// Converting the map to a slice
for version := range versionSet {
versions = append(versions, version)
}

return versions
}

func (p *Parser) parsePurlPkgs(matches []string) []string {
var (
pattern = p.matcherPurlPkg
// Creating a map to ensure uniqueness
pkgSet = make(map[string]struct{})
pkgs []string
pkgStrReplacer = strings.NewReplacer(
// replaces "pypi/" with "".
"pypi/", "",
// replaces "npm/" with "".
"npm/", "",
// replaces "%40/" with "@".
"%40", "@",
)
)

// Adding the packages
pkgSet[matches[pattern.SubexpIndex("p1")]] = struct{}{}
pkgSet[matches[pattern.SubexpIndex("p2")]] = struct{}{}

// Converting the map to a slice and cleaning up the packages
for pkg := range pkgSet {
pkgs = append(pkgs, pkgStrReplacer.Replace(pkg))
}

return pkgs
}

func (p *Parser) removeDuplicates(matches []string) []string {
var (
result []string
encountered = make(map[string]struct{})
)

for match := range matches {
_, ok := encountered[matches[match]]
if ok {
continue
}
encountered[matches[match]] = struct{}{}
result = append(result, matches[match])
}

return result
}
17 changes: 17 additions & 0 deletions components/enrichers/reachability/internal/atom/purl/purl_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package purl_test

import (
"testing"

"github.com/stretchr/testify/require"

"github.com/ocurity/dracon/components/enrichers/reachability/internal/atom/purl"
)

func TestNewParser(t *testing.T) {
t.Run("should return new parser with valid matchers", func(t *testing.T) {
p, err := purl.NewParser()
require.NoError(t, err)
require.NotNil(t, p)
})
}
Loading
Loading