-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding business logic for reachability enricher - leveraging atom. #340
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
package main | ||
|
||
import ( | ||
"context" | ||
"errors" | ||
"fmt" | ||
"log/slog" | ||
"os/signal" | ||
"syscall" | ||
|
||
"golang.org/x/sync/errgroup" | ||
|
||
"github.com/ocurity/dracon/components/enrichers/reachability/internal/atom" | ||
"github.com/ocurity/dracon/components/enrichers/reachability/internal/atom/purl" | ||
"github.com/ocurity/dracon/components/enrichers/reachability/internal/conf" | ||
"github.com/ocurity/dracon/components/enrichers/reachability/internal/enricher" | ||
"github.com/ocurity/dracon/components/enrichers/reachability/internal/fs" | ||
"github.com/ocurity/dracon/components/enrichers/reachability/internal/logging" | ||
) | ||
|
||
func main() { | ||
ctx, cancel := signal.NotifyContext( | ||
context.Background(), | ||
syscall.SIGTERM, | ||
syscall.SIGQUIT, | ||
syscall.SIGABRT, | ||
syscall.SIGINT, | ||
syscall.SIGKILL, | ||
) | ||
|
||
defer cancel() | ||
|
||
logger := logging.NewLogger() | ||
ctx = logging.WithContext(ctx, logger) | ||
|
||
if err := Main(ctx, cancel); err != nil { | ||
logger.Error("unexpected error", slog.String("err", err.Error())) | ||
} | ||
} | ||
|
||
func Main(ctx context.Context, cancel func()) error { | ||
cfg, err := conf.New() | ||
if err != nil { | ||
return fmt.Errorf("could not load configuration: %w", err) | ||
} | ||
|
||
purlParser, err := purl.NewParser() | ||
if err != nil { | ||
return fmt.Errorf("could not initialize purl parser: %w", err) | ||
} | ||
|
||
atomReader, err := atom.NewReader(cfg.ATOMFilePath, purlParser) | ||
if err != nil { | ||
return fmt.Errorf("could not initialize atom reader: %w", err) | ||
} | ||
|
||
fsReadWriter, err := fs.NewReadWriter(cfg.ProducerResultsPath, cfg.EnrichedResultsPath) | ||
if err != nil { | ||
return fmt.Errorf("could not initialize filesystem read/writer: %w", err) | ||
} | ||
|
||
enr, err := enricher.NewEnricher(cfg, atomReader, fsReadWriter) | ||
if err != nil { | ||
return fmt.Errorf("could not initialize enricher: %w", err) | ||
} | ||
|
||
g, egCtx := errgroup.WithContext(ctx) | ||
|
||
// Terminates earlier if the context is cancelled. | ||
g.Go(func() error { | ||
<-egCtx.Done() | ||
return egCtx.Err() | ||
}) | ||
|
||
g.Go(func() error { | ||
if err := enr.Enrich(egCtx); err != nil { | ||
return fmt.Errorf("unexpected error while enriching: %w", err) | ||
} | ||
cancel() | ||
return nil | ||
}) | ||
|
||
if err := g.Wait(); err != nil && !isCtxErr(err) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gracefully exit when done and handle correctly context cancellations. |
||
return fmt.Errorf("unexpected error in waitgroup: %w", err) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func isCtxErr(err error) bool { | ||
return errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
package main | ||
|
||
import ( | ||
"context" | ||
"os" | ||
"path" | ||
"testing" | ||
"time" | ||
|
||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
const ( | ||
baseTestdataPath = "../test/testdata" | ||
|
||
banditRawFileName = "bandit.raw.pb" | ||
banditEnrichedFileName = "bandit.reachability.enriched.pb" | ||
safetyRawFileName = "pip-safety.raw.pb" | ||
safetyEnrichedFileName = "pip-safety.reachability.enriched.pb" | ||
) | ||
|
||
var ( | ||
resultsFilesPath = path.Join(baseTestdataPath, "results") | ||
expectedFilesPath = path.Join(baseTestdataPath, "expectations") | ||
|
||
envVars = map[string]string{ | ||
"READ_PATH": baseTestdataPath, | ||
"WRITE_PATH": resultsFilesPath, | ||
"ATOM_FILE_PATH": path.Join(baseTestdataPath, "reachables.json"), | ||
} | ||
) | ||
|
||
func TestEnricher(t *testing.T) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the main test. Executes everything end to end. |
||
// Cleanup test bed. | ||
t.Cleanup(func() { | ||
for ev := range envVars { | ||
require.NoError( | ||
t, | ||
os.Unsetenv(ev), | ||
) | ||
} | ||
require.NoError(t, os.RemoveAll(resultsFilesPath)) | ||
}) | ||
|
||
// Setup test bed. | ||
for ev, val := range envVars { | ||
require.NoError(t, os.Setenv(ev, val)) | ||
} | ||
|
||
t.Run("it correctly cancels and returns earlier", func(t *testing.T) { | ||
ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) | ||
defer cancel() | ||
require.NoError(t, Main(ctx, cancel)) | ||
}) | ||
t.Run("it enriches bandit and safety reports as expected", func(t *testing.T) { | ||
ctx, cancel := context.WithTimeout(context.Background(), time.Minute) | ||
defer cancel() | ||
|
||
require.NoError(t, Main(ctx, cancel)) | ||
|
||
// Does the results folder exist? | ||
require.DirExists(t, resultsFilesPath) | ||
for _, fp := range []string{ | ||
banditRawFileName, | ||
banditEnrichedFileName, | ||
safetyRawFileName, | ||
safetyEnrichedFileName, | ||
} { | ||
// Do all expected result files exist? | ||
resFilePath := getResultPath(t, fp) | ||
require.FileExistsf(t, resFilePath, "result file %s doesn't exist in path", resFilePath) | ||
expFilePath := getExpectedPath(t, fp) | ||
require.FileExistsf(t, resFilePath, "expected file %s doesn't exist in path", expFilePath) | ||
|
||
resFile, err := os.ReadFile(resFilePath) | ||
require.NoErrorf(t, err, "could not open results file %s", resFilePath) | ||
|
||
expFile, err := os.ReadFile(expFilePath) | ||
require.NoErrorf(t, err, "could not open expectations file %s", expFilePath) | ||
|
||
assert.Equalf( | ||
t, | ||
string(resFile), | ||
string(expFile), | ||
"expected file %s doesn't match results file %s", | ||
expFilePath, | ||
resFilePath, | ||
) | ||
} | ||
}) | ||
} | ||
|
||
func getResultPath(t *testing.T, fileName string) string { | ||
t.Helper() | ||
return path.Join(resultsFilesPath, fileName) | ||
} | ||
|
||
func getExpectedPath(t *testing.T, fileName string) string { | ||
t.Helper() | ||
return path.Join(expectedFilesPath, fileName) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
package purl | ||
|
||
import ( | ||
"fmt" | ||
"regexp" | ||
"strings" | ||
) | ||
|
||
// Parser allows to extract information from purls - https://github.com/package-url/purl-spec. | ||
type Parser struct { | ||
matcherPurlPkg *regexp.Regexp | ||
matcherPurlTrailingVersion *regexp.Regexp | ||
matcherPurlVersion *regexp.Regexp | ||
} | ||
|
||
func NewParser() (*Parser, error) { | ||
purlPkg, err := regexp.Compile(`(?P<p1>[^/:]+/(?P<p2>[^/]+))(?:(?:.|/)v\d+)?@`) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are we using regexp to parse this and not https://github.com/package-url/packageurl-go? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will look into it! |
||
if err != nil { | ||
return nil, fmt.Errorf("failed to compile purl pkg regex: %w", err) | ||
} | ||
purlTrailingVersion, err := regexp.Compile(`[./]v\d+@`) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to compile purl trailing version regex: %w", err) | ||
} | ||
purlVersion, err := regexp.Compile(`@(?P<v1>v?(?P<v2>[\d.]+){1,3})(?P<ext>[^?\s]+)?`) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to compile purl version regex: %w", err) | ||
} | ||
|
||
return &Parser{ | ||
matcherPurlPkg: purlPkg, | ||
matcherPurlTrailingVersion: purlTrailingVersion, | ||
matcherPurlVersion: purlVersion, | ||
}, nil | ||
} | ||
|
||
// ParsePurl extracts pkg:version matches from the supplied purl. | ||
func (p *Parser) ParsePurl(purl string) ([]string, error) { | ||
purl = p.matcherPurlTrailingVersion.ReplaceAllString(purl, "$1@") | ||
|
||
var ( | ||
result []string | ||
pkgs []string | ||
versions []string | ||
) | ||
|
||
if match := p.matcherPurlVersion.FindStringSubmatch(purl); len(match) > 0 { | ||
versions = p.parsePurlVersions(match) | ||
} | ||
|
||
if match := p.matcherPurlPkg.FindStringSubmatch(purl); len(match) > 0 { | ||
pkgs = p.parsePurlPkgs(match) | ||
} | ||
|
||
for _, pkg := range pkgs { | ||
for _, version := range versions { | ||
result = append(result, fmt.Sprintf("%s:%s", pkg, version)) | ||
} | ||
} | ||
|
||
return p.removeDuplicates(result), nil | ||
} | ||
|
||
func (p *Parser) parsePurlVersions(matches []string) []string { | ||
if len(matches) == 0 { | ||
return make([]string, 0) | ||
} | ||
|
||
var ( | ||
pattern = p.matcherPurlVersion | ||
versions []string | ||
// Creating a map to ensure uniqueness | ||
versionSet = make(map[string]struct{}) | ||
|
||
// Assuming the named groups are in the match | ||
vers1 = matches[pattern.SubexpIndex("v1")] | ||
vers2 = matches[pattern.SubexpIndex("v2")] | ||
ext = matches[pattern.SubexpIndex("ext")] | ||
) | ||
|
||
// Adding the basic versions | ||
versionSet[vers1] = struct{}{} | ||
versionSet[vers2] = struct{}{} | ||
|
||
// Adding the extended versions if ext exists | ||
if ext != "" { | ||
versionSet[vers1+ext] = struct{}{} | ||
versionSet[vers2+ext] = struct{}{} | ||
} | ||
|
||
// Converting the map to a slice | ||
for version := range versionSet { | ||
versions = append(versions, version) | ||
} | ||
|
||
return versions | ||
} | ||
|
||
func (p *Parser) parsePurlPkgs(matches []string) []string { | ||
var ( | ||
pattern = p.matcherPurlPkg | ||
// Creating a map to ensure uniqueness | ||
pkgSet = make(map[string]struct{}) | ||
pkgs []string | ||
pkgStrReplacer = strings.NewReplacer( | ||
// replaces "pypi/" with "". | ||
"pypi/", "", | ||
// replaces "npm/" with "". | ||
"npm/", "", | ||
// replaces "%40/" with "@". | ||
"%40", "@", | ||
) | ||
) | ||
|
||
// Adding the packages | ||
pkgSet[matches[pattern.SubexpIndex("p1")]] = struct{}{} | ||
pkgSet[matches[pattern.SubexpIndex("p2")]] = struct{}{} | ||
|
||
// Converting the map to a slice and cleaning up the packages | ||
for pkg := range pkgSet { | ||
pkgs = append(pkgs, pkgStrReplacer.Replace(pkg)) | ||
} | ||
|
||
return pkgs | ||
} | ||
|
||
func (p *Parser) removeDuplicates(matches []string) []string { | ||
var ( | ||
result []string | ||
encountered = make(map[string]struct{}) | ||
) | ||
|
||
for match := range matches { | ||
_, ok := encountered[matches[match]] | ||
if ok { | ||
continue | ||
} | ||
encountered[matches[match]] = struct{}{} | ||
result = append(result, matches[match]) | ||
} | ||
|
||
return result | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package purl_test | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
|
||
"github.com/ocurity/dracon/components/enrichers/reachability/internal/atom/purl" | ||
) | ||
|
||
func TestNewParser(t *testing.T) { | ||
t.Run("should return new parser with valid matchers", func(t *testing.T) { | ||
p, err := purl.NewParser() | ||
require.NoError(t, err) | ||
require.NotNil(t, p) | ||
}) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you are not familiar with the concept, code in
internal
cannot be imported by other go modules. The build tool would report an error if this happens - leading to a failing build.This makes sure to leak to potential user only the API that we actually want to expose. This is a nice Go built in feature