Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Report Yarn v3/v4 patches as pedigree rather than components #784

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions cachi2/core/models/sbom.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,25 @@ class ExternalReference(pydantic.BaseModel):
type: Literal["distribution"] = "distribution"


class PatchDiff(pydantic.BaseModel):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The CycloneDX schema offers the option of a text diff for the patch, but it is not required. I opted not to include so as not to clutter the SBOM. Let me know if you disagree.

"""A Diff inside a Patch."""

url: str


class Patch(pydantic.BaseModel):
"""A Patch inside a SBOM Component Pedigree."""

type: Literal["backport", "cherry-pick", "monkey", "unofficial"] = "unofficial"
diff: PatchDiff


class Pedigree(pydantic.BaseModel):
"""A Pedigree inside a SBOM component."""

patches: list[Patch]


FOUND_BY_CACHI2_PROPERTY: Property = Property(name="cachi2:found_by", value="cachi2")


Expand All @@ -46,6 +65,7 @@ class Component(pydantic.BaseModel):
external_references: Optional[list[ExternalReference]] = pydantic.Field(
serialization_alias="externalReferences", default=None
)
pedigree: Optional[Pedigree] = None

def key(self) -> str:
"""Uniquely identifies a package.
Expand Down
4 changes: 2 additions & 2 deletions cachi2/core/package_managers/yarn/locators.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,8 @@ def _parse_patch_locator(locator: "_ParsedLocator") -> PatchLocator:

# https://github.com/yarnpkg/berry/blob/b6026842dfec4b012571b5982bb74420c7682a73/packages/plugin-patch/sources/patchUtils.ts#L92
def process_patch_path(patch: str) -> Union[str, Path]:
# '~' denotes an optional patch (failing to apply the patch is not fatal, only a warning)
patch = patch.removeprefix("~")
# '~' or `optional!' denotes an optional patch (failing to apply the patch is not fatal, only a warning)
patch = patch.removeprefix("~").removeprefix("optional!")
if re.match(r"^builtin<([^>]+)>$", patch):
return patch
else:
Expand Down
90 changes: 65 additions & 25 deletions cachi2/core/package_managers/yarn/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from packageurl import PackageURL

from cachi2.core.errors import PackageManagerError, PackageRejected, UnsupportedFeature
from cachi2.core.models.sbom import Component
from cachi2.core.models.sbom import Component, Patch, PatchDiff, Pedigree
from cachi2.core.package_managers.yarn.locators import (
FileLocator,
HttpsLocator,
Expand Down Expand Up @@ -165,8 +165,18 @@ def create_components(
packages: list[Package], project: Project, output_dir: RootedPath
) -> list[Component]:
"""Create SBOM components for all the packages parsed from the 'yarn info' output."""
package_mapping = {package.parsed_locator: package for package in packages}
component_resolver = _ComponentResolver(package_mapping, project, output_dir)
package_mapping: dict[Locator, Package] = {}
patch_locators: list[PatchLocator] = []

# Patches are not components themselves, but they are necessary to
# resolve pedigree for their non-patch parent package components
for package in packages:
if isinstance(package.parsed_locator, PatchLocator):
patch_locators.append(package.parsed_locator)
else:
package_mapping[package.parsed_locator] = package

component_resolver = _ComponentResolver(package_mapping, patch_locators, project, output_dir)
return [component_resolver.get_component(package) for package in package_mapping.values()]


Expand All @@ -192,11 +202,41 @@ class _CouldNotResolve(ValueError):

class _ComponentResolver:
def __init__(
self, package_mapping: Mapping[Locator, Package], project: Project, output_dir: RootedPath
self,
package_mapping: Mapping[Locator, Package],
patch_locators: list[PatchLocator],
project: Project,
output_dir: RootedPath,
) -> None:
self._project = project
self._output_dir = output_dir
self._package_mapping = package_mapping
self._pedigree_mapping = self._get_pedigree_mapping(patch_locators)

def _get_pedigree_mapping(self, patch_locators: list[PatchLocator]) -> dict[Locator, Pedigree]:
"""Map locators for dependencies that get patched to their Pedigree."""
pedigree_mapping: dict[Locator, Pedigree] = {}
for patch_locator in patch_locators:
# Filter out builtin patches
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Builtin patches are applied by Yarn itself and tied to a specific version of Yarn. They are used to make certain features of the package manager work correctly: example.

Do we want to report these?

patch_paths: list[Path] = [p for p in patch_locator.patches if isinstance(p, Path)]
if not patch_paths:
continue

patched_package = self._get_patched_package(patch_locator)
pedigree = pedigree_mapping.setdefault(patched_package, Pedigree(patches=[]))
for patch_path in patch_paths:
patch_url = self._get_patch_url(patch_locator, patch_path)
pedigree.patches.append(Patch(type="unofficial", diff=PatchDiff(url=patch_url)))

return pedigree_mapping

def _get_patched_package(self, patch_locator: PatchLocator) -> Locator:
"""Return the non-patch parent package for a given patch locator."""
patched_locator = patch_locator.package
while isinstance(patched_locator, PatchLocator):
patched_locator = patched_locator.package

return patched_locator

def get_component(self, package: Package) -> Component:
"""Create an SBOM component for a yarn Package."""
Expand All @@ -217,6 +257,7 @@ def get_component(self, package: Package) -> Component:
name=resolved_package.name,
version=resolved_package.version,
purl=purl,
pedigree=self._pedigree_mapping.get(package.parsed_locator),
)

@staticmethod
Expand Down Expand Up @@ -262,10 +303,7 @@ def _generate_purl_for_package(package: _ResolvedPackage, project: Project) -> s
subpath = str(normalized.subpath_from_root)

elif isinstance(package.locator, PatchLocator):
# ignore patch locators
# the actual dependency that is patched is reported separately
# the patch itself will be reported via SBOM pedigree patches
pass
raise _CouldNotResolve("Patches cannot be resolved into Components")
else:
assert_never(package.locator)

Expand Down Expand Up @@ -329,23 +367,7 @@ def log_for_locator(msg: str, *args: Any, level: int = logging.DEBUG) -> None:
)
name, version = self._read_name_version_from_packjson(packjson)
elif isinstance(locator, PatchLocator):
if (
package.cache_path
# yarn info seems to always report the cache path for patch dependencies,
# but the path doesn't always exist
and (cache_path := self._cache_path_as_rooted(package.cache_path)).path.exists()
):
log_for_locator("reading package name from %s", cache_path.subpath_from_root)
name = self._read_name_from_cache(cache_path)
elif orig_package := self._package_mapping.get(locator.package):
log_for_locator("resolving the name of the original package")
name = self._resolve_package(orig_package).name
else:
raise _CouldNotResolve(
"the 'yarn info' output does not include either an existing zip archive "
"or the original unpatched package",
)
version = package.version
raise _CouldNotResolve("Patches cannot be resolved into Components")
else:
# This line can never be reached assuming type-checker checks are passing
# https://typing.readthedocs.io/en/latest/source/unreachable.html#assert-never-and-exhaustiveness-checking
Expand Down Expand Up @@ -412,3 +434,21 @@ def _cache_path_as_rooted(self, cache_path: str) -> RootedPath:
return self._project_subpath(cache_path)
else:
return self._output_dir.join_within_root(cache_path)

def _get_patch_url(self, patch_locator: PatchLocator, patch_path: Path) -> str:
"""Return a PURL-style VCS URL qualifier with subpath for a Patch."""
if patch_locator.locator is None:
raise UnsupportedFeature(
(
f"{patch_locator} is missing an associated workspace locator "
"and Cachi2 expects all non-builtin yarn patches to have one"
)
)

project_path = self._project.source_dir
workspace_path = patch_locator.locator.relpath
normalized = self._project.source_dir.join_within_root(workspace_path, patch_path)
repo_url = get_repo_id(project_path.root).as_vcs_url_qualifier()
subpath_from_root = str(normalized.subpath_from_root)

return f"{repo_url}#{subpath_from_root}"
48 changes: 36 additions & 12 deletions tests/integration/test_data/yarn_e2e_test/bom.json
Original file line number Diff line number Diff line change
Expand Up @@ -314,18 +314,16 @@
},
{
"name": "cachito-npm-without-deps",
"properties": [
{
"name": "cachi2:found_by",
"value": "cachi2"
}
],
"purl": "pkg:npm/[email protected]",
"type": "library",
"version": "1.0.0"
},
{
"name": "cachito-npm-without-deps",
"pedigree": {
"patches": [
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@70515793108df42547d3320c7ea4cd6b6e505c46#.yarn/patches/[email protected]"
},
"type": "unofficial"
}
]
},
"properties": [
{
"name": "cachi2:found_by",
Expand Down Expand Up @@ -662,6 +660,16 @@
},
{
"name": "fsevents",
"pedigree": {
"patches": [
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@70515793108df42547d3320c7ea4cd6b6e505c46#my-patches/fsevents.patch"
},
"type": "unofficial"
}
]
},
"properties": [
{
"name": "cachi2:found_by",
Expand Down Expand Up @@ -961,6 +969,22 @@
},
{
"name": "left-pad",
"pedigree": {
"patches": [
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@70515793108df42547d3320c7ea4cd6b6e505c46#my-patches/left-pad.patch"
},
"type": "unofficial"
},
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@70515793108df42547d3320c7ea4cd6b6e505c46#my-patches/left-pad-2.patch"
},
"type": "unofficial"
}
]
},
"properties": [
{
"name": "cachi2:found_by",
Expand Down
48 changes: 36 additions & 12 deletions tests/integration/test_data/yarn_v4/bom.json
Original file line number Diff line number Diff line change
Expand Up @@ -314,18 +314,16 @@
},
{
"name": "cachito-npm-without-deps",
"properties": [
{
"name": "cachi2:found_by",
"value": "cachi2"
}
],
"purl": "pkg:npm/[email protected]",
"type": "library",
"version": "1.0.0"
},
{
"name": "cachito-npm-without-deps",
"pedigree": {
"patches": [
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@53a2bfe8d5ee7ed9c2f752fe75831a881d54895f#.yarn/patches/[email protected]"
},
"type": "unofficial"
}
]
},
"properties": [
{
"name": "cachi2:found_by",
Expand Down Expand Up @@ -662,6 +660,16 @@
},
{
"name": "fsevents",
"pedigree": {
"patches": [
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@53a2bfe8d5ee7ed9c2f752fe75831a881d54895f#my-patches/fsevents.patch"
},
"type": "unofficial"
}
]
},
"properties": [
{
"name": "cachi2:found_by",
Expand Down Expand Up @@ -961,6 +969,22 @@
},
{
"name": "left-pad",
"pedigree": {
"patches": [
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@53a2bfe8d5ee7ed9c2f752fe75831a881d54895f#my-patches/left-pad.patch"
},
"type": "unofficial"
},
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@53a2bfe8d5ee7ed9c2f752fe75831a881d54895f#my-patches/left-pad-2.patch"
},
"type": "unofficial"
}
]
},
"properties": [
{
"name": "cachi2:found_by",
Expand Down
5 changes: 2 additions & 3 deletions tests/unit/package_managers/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,6 @@ def test_resolve_generic_lockfile_invalid(
"properties": [{"name": "cachi2:found_by", "value": "cachi2"}],
"purl": "pkg:generic/archive.zip?checksum=md5:3a18656e1cea70504b905836dee14db0&download_url=https://example.com/artifact",
"type": "file",
"version": None,
},
{
"external_references": [
Expand All @@ -268,7 +267,6 @@ def test_resolve_generic_lockfile_invalid(
"properties": [{"name": "cachi2:found_by", "value": "cachi2"}],
"purl": "pkg:generic/file.tar.gz?checksum=md5:32112bed1914cfe3799600f962750b1d&download_url=https://example.com/more/complex/path/file.tar.gz%3Ffoo%3Dbar%23fragment",
"type": "file",
"version": None,
},
],
id="valid_lockfile",
Expand Down Expand Up @@ -324,7 +322,8 @@ def test_resolve_generic_lockfile_valid(
f.write(lockfile_content)

assert [
c.model_dump() for c in _resolve_generic_lockfile(lockfile_path.path, rooted_tmp_path)
c.model_dump(exclude_none=True)
for c in _resolve_generic_lockfile(lockfile_path.path, rooted_tmp_path)
] == expected_components
mock_checksums.assert_called()

Expand Down
Loading
Loading