diff --git a/components/enrichers/reachability/internal/atom/purl/purl.go b/components/enrichers/reachability/internal/atom/purl/purl.go index 1d0beb1f1..f0581b4ee 100644 --- a/components/enrichers/reachability/internal/atom/purl/purl.go +++ b/components/enrichers/reachability/internal/atom/purl/purl.go @@ -2,142 +2,72 @@ package purl import ( "fmt" + "path" "regexp" - "strings" + + "github.com/package-url/packageurl-go" ) // Parser allows to extract information from purls - https://github.com/package-url/purl-spec. type Parser struct { - matcherPurlPkg *regexp.Regexp - matcherPurlTrailingVersion *regexp.Regexp - matcherPurlVersion *regexp.Regexp + semverPattern *regexp.Regexp + shaCommitPattern *regexp.Regexp } +// NewParser returns a new parser. func NewParser() (*Parser, error) { - purlPkg, err := regexp.Compile(`(?P[^/:]+/(?P[^/]+))(?:(?:.|/)v\d+)?@`) - if err != nil { - return nil, fmt.Errorf("failed to compile purl pkg regex: %w", err) - } - purlTrailingVersion, err := regexp.Compile(`[./]v\d+@`) + // Matches SEMVER versions: v1.1.0 / v1.1.0-beta. + semverPattern, err := regexp.Compile(`^v?(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(-[0-9A-Za-z\-\.]+)?(\+[0-9A-Za-z\-\.]+)?$`) if err != nil { - return nil, fmt.Errorf("failed to compile purl trailing version regex: %w", err) + return nil, fmt.Errorf("failed to compile purl semver regex: %w", err) } - purlVersion, err := regexp.Compile(`@(?Pv?(?P[\d.]+){1,3})(?P[^?\s]+)?`) + // Matches SHA commit hashes from 7 (short) to 40 characters. + shaCommitPattern, err := regexp.Compile(`^[a-fA-F0-9]{7,40}$`) if err != nil { - return nil, fmt.Errorf("failed to compile purl version regex: %w", err) + return nil, fmt.Errorf("failed to compile sha commit pattern regex: %w", err) } return &Parser{ - matcherPurlPkg: purlPkg, - matcherPurlTrailingVersion: purlTrailingVersion, - matcherPurlVersion: purlVersion, + semverPattern: semverPattern, + shaCommitPattern: shaCommitPattern, }, nil } -// ParsePurl extracts pkg:version matches from the supplied purl. +// ParsePurl extracts namespace:name:version sub-parts from purls, based on the type of versioning used (SHA, SEMVER). func (p *Parser) ParsePurl(purl string) ([]string, error) { - purl = p.matcherPurlTrailingVersion.ReplaceAllString(purl, "$1@") - - var ( - result []string - pkgs []string - versions []string - ) - - if match := p.matcherPurlVersion.FindStringSubmatch(purl); len(match) > 0 { - versions = p.parsePurlVersions(match) - } - - if match := p.matcherPurlPkg.FindStringSubmatch(purl); len(match) > 0 { - pkgs = p.parsePurlPkgs(match) - } - - for _, pkg := range pkgs { - for _, version := range versions { - result = append(result, fmt.Sprintf("%s:%s", pkg, version)) - } - } - - return p.removeDuplicates(result), nil -} - -func (p *Parser) parsePurlVersions(matches []string) []string { - if len(matches) == 0 { - return make([]string, 0) - } - - var ( - pattern = p.matcherPurlVersion - versions []string - // Creating a map to ensure uniqueness - versionSet = make(map[string]struct{}) - - // Assuming the named groups are in the match - vers1 = matches[pattern.SubexpIndex("v1")] - vers2 = matches[pattern.SubexpIndex("v2")] - ext = matches[pattern.SubexpIndex("ext")] - ) - - // Adding the basic versions - versionSet[vers1] = struct{}{} - versionSet[vers2] = struct{}{} - - // Adding the extended versions if ext exists - if ext != "" { - versionSet[vers1+ext] = struct{}{} - versionSet[vers2+ext] = struct{}{} - } - - // Converting the map to a slice - for version := range versionSet { - versions = append(versions, version) + pp, err := packageurl.FromString(purl) + if err != nil { + return nil, fmt.Errorf("failed to parse purl: %w", err) } - return versions -} - -func (p *Parser) parsePurlPkgs(matches []string) []string { - var ( - pattern = p.matcherPurlPkg - // Creating a map to ensure uniqueness - pkgSet = make(map[string]struct{}) - pkgs []string - pkgStrReplacer = strings.NewReplacer( - // replaces "pypi/" with "". - "pypi/", "", - // replaces "npm/" with "". - "npm/", "", - // replaces "%40/" with "@". - "%40", "@", - ) - ) - - // Adding the packages - pkgSet[matches[pattern.SubexpIndex("p1")]] = struct{}{} - pkgSet[matches[pattern.SubexpIndex("p2")]] = struct{}{} - - // Converting the map to a slice and cleaning up the packages - for pkg := range pkgSet { - pkgs = append(pkgs, pkgStrReplacer.Replace(pkg)) + if pp.Version == "" { + return nil, fmt.Errorf("failed to parse purl: empty version") } - return pkgs -} - -func (p *Parser) removeDuplicates(matches []string) []string { var ( - result []string - encountered = make(map[string]struct{}) + namespace = pp.Namespace + name = pp.Name + version = pp.Version + shortVersion string + purlParts = []string{ + path.Join(namespace, name) + ":" + version, + name + ":" + version, + } ) - for match := range matches { - _, ok := encountered[matches[match]] - if ok { - continue - } - encountered[matches[match]] = struct{}{} - result = append(result, matches[match]) + switch { + case p.semverPattern.MatchString(version): + return purlParts, nil + case p.shaCommitPattern.MatchString(version): + // Short commit SHA. + shortVersion = version[:7] + purlParts = append(purlParts, []string{ + path.Join(namespace, name) + ":" + shortVersion, + name + ":" + shortVersion, + }...) + default: + return nil, fmt.Errorf("failed to parse purl, invalid version: %s", version) } - return result + return purlParts, nil } diff --git a/components/enrichers/reachability/internal/atom/purl/purl_test.go b/components/enrichers/reachability/internal/atom/purl/purl_test.go index e8c08225d..d58e3ccf5 100644 --- a/components/enrichers/reachability/internal/atom/purl/purl_test.go +++ b/components/enrichers/reachability/internal/atom/purl/purl_test.go @@ -1,17 +1,88 @@ package purl_test import ( + "fmt" "testing" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/ocurity/dracon/components/enrichers/reachability/internal/atom/purl" ) -func TestNewParser(t *testing.T) { - t.Run("should return new parser with valid matchers", func(t *testing.T) { - p, err := purl.NewParser() - require.NoError(t, err) - require.NotNil(t, p) - }) +func TestParser_ParsePurl(t *testing.T) { + p, err := purl.NewParser() + require.NoError(t, err) + + for _, tt := range []struct { + inputPurl string + expectedMatches []string + expectedError bool + }{ + { + inputPurl: "hey", + expectedError: true, + }, + { + inputPurl: "pkg:bitbucket/birkenfeld/pygments-main", + expectedError: true, + }, + { + inputPurl: "pkg:bitbucket/birkenfeld/pygments-main@v1", + expectedError: true, + }, + { + inputPurl: "pkg:bitbucket/birkenfeld/pygments-main@v1.1", + expectedError: true, + }, + { + inputPurl: "pkg:bitbucket/birkenfeld/pygments-main@244", + expectedError: true, + }, + { + inputPurl: "pkg:bitbucket/birkenfeld/pygments-main@244fd47e07d1014f0aed9c", + expectedMatches: []string{ + "birkenfeld/pygments-main:244fd47e07d1014f0aed9c", + "pygments-main:244fd47e07d1014f0aed9c", + "birkenfeld/pygments-main:244fd47", + "pygments-main:244fd47", + }, + }, + { + inputPurl: "pkg:deb/debian/curl@7.50.3-1?arch=i386&distro=jessie", + expectedMatches: []string{ + "debian/curl:7.50.3-1", + "curl:7.50.3-1", + }, + }, + { + inputPurl: "pkg:github/package-url/purl-spec@244fd47e07d1004f0aed9c", + expectedMatches: []string{ + "package-url/purl-spec:244fd47e07d1004f0aed9c", + "purl-spec:244fd47e07d1004f0aed9c", + "package-url/purl-spec:244fd47", + "purl-spec:244fd47", + }, + }, + { + inputPurl: "pkg:github/package-url/purl-spec@v1.2.3-beta", + expectedMatches: []string{ + "package-url/purl-spec:v1.2.3-beta", + "purl-spec:v1.2.3-beta", + }, + }, + } { + t.Run( + fmt.Sprintf("parsing with input %s should succeed: %v", tt.inputPurl, !tt.expectedError), + func(t *testing.T) { + pp, err := p.ParsePurl(tt.inputPurl) + if tt.expectedError { + require.Error(t, err) + assert.Nil(t, pp) + return + } + require.NoError(t, err) + assert.Equal(t, tt.expectedMatches, pp) + }) + } } diff --git a/components/enrichers/reachability/internal/atom/reader.go b/components/enrichers/reachability/internal/atom/reader.go index 2bc4996e3..b7fda9b68 100644 --- a/components/enrichers/reachability/internal/atom/reader.go +++ b/components/enrichers/reachability/internal/atom/reader.go @@ -9,8 +9,6 @@ import ( "os" "strings" - "github.com/jmespath/go-jmespath" - "github.com/ocurity/dracon/components/enrichers/reachability/internal/atom/purl" "github.com/ocurity/dracon/components/enrichers/reachability/internal/logging" ) @@ -94,49 +92,29 @@ func (r *Reader) Read(ctx context.Context) (*Response, error) { // ReachablePurls finds all the reachable purls presents in the atom reachability result. func (r *Reader) ReachablePurls(ctx context.Context, reachables *Response) (ReachablePurls, error) { - logger := logging.FromContext(ctx) - - rawPurls, err := jmespath.Search("reachables[].purls[]", reachables) - if err != nil { - return nil, fmt.Errorf("failed to search reachable purls: %w", err) - } - - purls, ok := rawPurls.([]any) - if !ok { - logger.Error( - "invalid raw reachable purl. Expected an array", - slog.Any("raw_purls", rawPurls), - ) - return nil, errors.New("invalid raw reachable purl. Expected an array") - } - - uniquePurls := make(map[string]struct{}) - for idx, p := range purls { - ps, ok := p.(string) - if !ok { - logger.Error( - "unexpected purl type, expected a string. Continuing...", - slog.Any("purl", p), - slog.Int("index", idx), - ) - continue - } - uniquePurls[ps] = struct{}{} - } - - finalPurls := make(ReachablePurls) - for p := range uniquePurls { - parsedPurls, err := r.purlParser.ParsePurl(p) - if err != nil { - logger.Error( - "could not parse purl. Continuing...", - slog.Any("purl", p), - ) - continue - } - - for _, pp := range parsedPurls { - finalPurls[pp] = struct{}{} + var ( + logger = logging.FromContext(ctx) + uniquePurls = make(map[string]struct{}) + finalPurls = make(ReachablePurls) + ) + + for _, reachable := range reachables.Reachables { + for _, p := range reachable.Purls { + if _, ok := uniquePurls[p]; !ok { + uniquePurls[p] = struct{}{} + parsedPurls, err := r.purlParser.ParsePurl(p) + if err != nil { + logger.Error( + "could not parse purl. Continuing...", + slog.Any("purl", p), + ) + continue + } + + for _, pp := range parsedPurls { + finalPurls[pp] = struct{}{} + } + } } }