Skip to content

Commit

Permalink
Simplifying purl parsing and reachability flow for atom reachability …
Browse files Browse the repository at this point in the history
…enricher.
  • Loading branch information
andream16 authored and ptzianos committed Sep 21, 2024
1 parent 6e66146 commit 65504c2
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 162 deletions.
152 changes: 41 additions & 111 deletions components/enrichers/reachability/internal/atom/purl/purl.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,142 +2,72 @@ package purl

import (
"fmt"
"path"
"regexp"
"strings"

"github.com/package-url/packageurl-go"
)

// Parser allows to extract information from purls - https://github.com/package-url/purl-spec.
type Parser struct {
matcherPurlPkg *regexp.Regexp
matcherPurlTrailingVersion *regexp.Regexp
matcherPurlVersion *regexp.Regexp
semverPattern *regexp.Regexp
shaCommitPattern *regexp.Regexp
}

// NewParser returns a new parser.
func NewParser() (*Parser, error) {
purlPkg, err := regexp.Compile(`(?P<p1>[^/:]+/(?P<p2>[^/]+))(?:(?:.|/)v\d+)?@`)
if err != nil {
return nil, fmt.Errorf("failed to compile purl pkg regex: %w", err)
}
purlTrailingVersion, err := regexp.Compile(`[./]v\d+@`)
// Matches SEMVER versions: v1.1.0 / v1.1.0-beta.
semverPattern, err := regexp.Compile(`^v?(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(-[0-9A-Za-z\-\.]+)?(\+[0-9A-Za-z\-\.]+)?$`)
if err != nil {
return nil, fmt.Errorf("failed to compile purl trailing version regex: %w", err)
return nil, fmt.Errorf("failed to compile purl semver regex: %w", err)
}
purlVersion, err := regexp.Compile(`@(?P<v1>v?(?P<v2>[\d.]+){1,3})(?P<ext>[^?\s]+)?`)
// Matches SHA commit hashes from 7 (short) to 40 characters.
shaCommitPattern, err := regexp.Compile(`^[a-fA-F0-9]{7,40}$`)
if err != nil {
return nil, fmt.Errorf("failed to compile purl version regex: %w", err)
return nil, fmt.Errorf("failed to compile sha commit pattern regex: %w", err)
}

return &Parser{
matcherPurlPkg: purlPkg,
matcherPurlTrailingVersion: purlTrailingVersion,
matcherPurlVersion: purlVersion,
semverPattern: semverPattern,
shaCommitPattern: shaCommitPattern,
}, nil
}

// ParsePurl extracts pkg:version matches from the supplied purl.
// ParsePurl extracts namespace:name:version sub-parts from purls, based on the type of versioning used (SHA, SEMVER).
func (p *Parser) ParsePurl(purl string) ([]string, error) {
purl = p.matcherPurlTrailingVersion.ReplaceAllString(purl, "$1@")

var (
result []string
pkgs []string
versions []string
)

if match := p.matcherPurlVersion.FindStringSubmatch(purl); len(match) > 0 {
versions = p.parsePurlVersions(match)
}

if match := p.matcherPurlPkg.FindStringSubmatch(purl); len(match) > 0 {
pkgs = p.parsePurlPkgs(match)
}

for _, pkg := range pkgs {
for _, version := range versions {
result = append(result, fmt.Sprintf("%s:%s", pkg, version))
}
}

return p.removeDuplicates(result), nil
}

func (p *Parser) parsePurlVersions(matches []string) []string {
if len(matches) == 0 {
return make([]string, 0)
}

var (
pattern = p.matcherPurlVersion
versions []string
// Creating a map to ensure uniqueness
versionSet = make(map[string]struct{})

// Assuming the named groups are in the match
vers1 = matches[pattern.SubexpIndex("v1")]
vers2 = matches[pattern.SubexpIndex("v2")]
ext = matches[pattern.SubexpIndex("ext")]
)

// Adding the basic versions
versionSet[vers1] = struct{}{}
versionSet[vers2] = struct{}{}

// Adding the extended versions if ext exists
if ext != "" {
versionSet[vers1+ext] = struct{}{}
versionSet[vers2+ext] = struct{}{}
}

// Converting the map to a slice
for version := range versionSet {
versions = append(versions, version)
pp, err := packageurl.FromString(purl)
if err != nil {
return nil, fmt.Errorf("failed to parse purl: %w", err)
}

return versions
}

func (p *Parser) parsePurlPkgs(matches []string) []string {
var (
pattern = p.matcherPurlPkg
// Creating a map to ensure uniqueness
pkgSet = make(map[string]struct{})
pkgs []string
pkgStrReplacer = strings.NewReplacer(
// replaces "pypi/" with "".
"pypi/", "",
// replaces "npm/" with "".
"npm/", "",
// replaces "%40/" with "@".
"%40", "@",
)
)

// Adding the packages
pkgSet[matches[pattern.SubexpIndex("p1")]] = struct{}{}
pkgSet[matches[pattern.SubexpIndex("p2")]] = struct{}{}

// Converting the map to a slice and cleaning up the packages
for pkg := range pkgSet {
pkgs = append(pkgs, pkgStrReplacer.Replace(pkg))
if pp.Version == "" {
return nil, fmt.Errorf("failed to parse purl: empty version")
}

return pkgs
}

func (p *Parser) removeDuplicates(matches []string) []string {
var (
result []string
encountered = make(map[string]struct{})
namespace = pp.Namespace
name = pp.Name
version = pp.Version
shortVersion string
purlParts = []string{
path.Join(namespace, name) + ":" + version,
name + ":" + version,
}
)

for match := range matches {
_, ok := encountered[matches[match]]
if ok {
continue
}
encountered[matches[match]] = struct{}{}
result = append(result, matches[match])
switch {
case p.semverPattern.MatchString(version):
return purlParts, nil
case p.shaCommitPattern.MatchString(version):
// Short commit SHA.
shortVersion = version[:7]
purlParts = append(purlParts, []string{
path.Join(namespace, name) + ":" + shortVersion,
name + ":" + shortVersion,
}...)
default:
return nil, fmt.Errorf("failed to parse purl, invalid version: %s", version)
}

return result
return purlParts, nil
}
83 changes: 77 additions & 6 deletions components/enrichers/reachability/internal/atom/purl/purl_test.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,88 @@
package purl_test

import (
"fmt"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/ocurity/dracon/components/enrichers/reachability/internal/atom/purl"
)

func TestNewParser(t *testing.T) {
t.Run("should return new parser with valid matchers", func(t *testing.T) {
p, err := purl.NewParser()
require.NoError(t, err)
require.NotNil(t, p)
})
func TestParser_ParsePurl(t *testing.T) {
p, err := purl.NewParser()
require.NoError(t, err)

for _, tt := range []struct {
inputPurl string
expectedMatches []string
expectedError bool
}{
{
inputPurl: "hey",
expectedError: true,
},
{
inputPurl: "pkg:bitbucket/birkenfeld/pygments-main",
expectedError: true,
},
{
inputPurl: "pkg:bitbucket/birkenfeld/pygments-main@v1",
expectedError: true,
},
{
inputPurl: "pkg:bitbucket/birkenfeld/[email protected]",
expectedError: true,
},
{
inputPurl: "pkg:bitbucket/birkenfeld/pygments-main@244",
expectedError: true,
},
{
inputPurl: "pkg:bitbucket/birkenfeld/pygments-main@244fd47e07d1014f0aed9c",
expectedMatches: []string{
"birkenfeld/pygments-main:244fd47e07d1014f0aed9c",
"pygments-main:244fd47e07d1014f0aed9c",
"birkenfeld/pygments-main:244fd47",
"pygments-main:244fd47",
},
},
{
inputPurl: "pkg:deb/debian/[email protected]?arch=i386&distro=jessie",
expectedMatches: []string{
"debian/curl:7.50.3-1",
"curl:7.50.3-1",
},
},
{
inputPurl: "pkg:github/package-url/purl-spec@244fd47e07d1004f0aed9c",
expectedMatches: []string{
"package-url/purl-spec:244fd47e07d1004f0aed9c",
"purl-spec:244fd47e07d1004f0aed9c",
"package-url/purl-spec:244fd47",
"purl-spec:244fd47",
},
},
{
inputPurl: "pkg:github/package-url/[email protected]",
expectedMatches: []string{
"package-url/purl-spec:v1.2.3-beta",
"purl-spec:v1.2.3-beta",
},
},
} {
t.Run(
fmt.Sprintf("parsing with input %s should succeed: %v", tt.inputPurl, !tt.expectedError),
func(t *testing.T) {
pp, err := p.ParsePurl(tt.inputPurl)
if tt.expectedError {
require.Error(t, err)
assert.Nil(t, pp)
return
}
require.NoError(t, err)
assert.Equal(t, tt.expectedMatches, pp)
})
}
}
68 changes: 23 additions & 45 deletions components/enrichers/reachability/internal/atom/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ import (
"os"
"strings"

"github.com/jmespath/go-jmespath"

"github.com/ocurity/dracon/components/enrichers/reachability/internal/atom/purl"
"github.com/ocurity/dracon/components/enrichers/reachability/internal/logging"
)
Expand Down Expand Up @@ -94,49 +92,29 @@ func (r *Reader) Read(ctx context.Context) (*Response, error) {

// ReachablePurls finds all the reachable purls presents in the atom reachability result.
func (r *Reader) ReachablePurls(ctx context.Context, reachables *Response) (ReachablePurls, error) {
logger := logging.FromContext(ctx)

rawPurls, err := jmespath.Search("reachables[].purls[]", reachables)
if err != nil {
return nil, fmt.Errorf("failed to search reachable purls: %w", err)
}

purls, ok := rawPurls.([]any)
if !ok {
logger.Error(
"invalid raw reachable purl. Expected an array",
slog.Any("raw_purls", rawPurls),
)
return nil, errors.New("invalid raw reachable purl. Expected an array")
}

uniquePurls := make(map[string]struct{})
for idx, p := range purls {
ps, ok := p.(string)
if !ok {
logger.Error(
"unexpected purl type, expected a string. Continuing...",
slog.Any("purl", p),
slog.Int("index", idx),
)
continue
}
uniquePurls[ps] = struct{}{}
}

finalPurls := make(ReachablePurls)
for p := range uniquePurls {
parsedPurls, err := r.purlParser.ParsePurl(p)
if err != nil {
logger.Error(
"could not parse purl. Continuing...",
slog.Any("purl", p),
)
continue
}

for _, pp := range parsedPurls {
finalPurls[pp] = struct{}{}
var (
logger = logging.FromContext(ctx)
uniquePurls = make(map[string]struct{})
finalPurls = make(ReachablePurls)
)

for _, reachable := range reachables.Reachables {
for _, p := range reachable.Purls {
if _, ok := uniquePurls[p]; !ok {
uniquePurls[p] = struct{}{}
parsedPurls, err := r.purlParser.ParsePurl(p)
if err != nil {
logger.Error(
"could not parse purl. Continuing...",
slog.Any("purl", p),
)
continue
}

for _, pp := range parsedPurls {
finalPurls[pp] = struct{}{}
}
}
}
}

Expand Down

0 comments on commit 65504c2

Please sign in to comment.