Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Report Yarn v3/v4 patches as pedigree rather than components #784

Merged
merged 4 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions cachi2/core/models/sbom.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,25 @@ class ExternalReference(pydantic.BaseModel):
type: Literal["distribution"] = "distribution"


class PatchDiff(pydantic.BaseModel):
eskultety marked this conversation as resolved.
Show resolved Hide resolved
"""A Diff inside a Patch."""

url: str


class Patch(pydantic.BaseModel):
"""A Patch inside a SBOM Component Pedigree."""

type: Literal["backport", "cherry-pick", "monkey", "unofficial"] = "unofficial"
eskultety marked this conversation as resolved.
Show resolved Hide resolved
diff: PatchDiff


class Pedigree(pydantic.BaseModel):
"""A Pedigree inside a SBOM component."""

patches: list[Patch]


FOUND_BY_CACHI2_PROPERTY: Property = Property(name="cachi2:found_by", value="cachi2")


Expand All @@ -45,6 +64,7 @@ class Component(pydantic.BaseModel):
external_references: Optional[list[ExternalReference]] = pydantic.Field(
serialization_alias="externalReferences", default=None
)
pedigree: Optional[Pedigree] = None

def key(self) -> str:
"""Uniquely identifies a package.
Expand Down
8 changes: 6 additions & 2 deletions cachi2/core/package_managers/yarn/locators.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,14 @@ def _parse_patch_locator(locator: "_ParsedLocator") -> PatchLocator:

original_package = parse_locator(reference.source)

# https://github.com/yarnpkg/berry/blob/b6026842dfec4b012571b5982bb74420c7682a73/packages/plugin-patch/sources/patchUtils.ts#L92
def process_patch_path(patch: str) -> Union[str, Path]:
# '~' denotes an optional patch (failing to apply the patch is not fatal, only a warning)
# Yarn patches can be optional, where failing to apply the patch is not fatal, only a warning
# '~' denotes an optional patch in Yarn v3
# https://github.com/yarnpkg/berry/blob/b6026842dfec4b012571b5982bb74420c7682a73/packages/plugin-patch/sources/patchUtils.ts#L92
patch = patch.removeprefix("~")
# `optional!' denotes an optional patch in Yarn v4
# https://github.com/yarnpkg/berry/blob/93a56643ba3c813a87920dcf75c644eaf3b38e6f/packages/plugin-patch/sources/patchUtils.ts#L147
patch = patch.removeprefix("optional!")
if re.match(r"^builtin<([^>]+)>$", patch):
return patch
else:
Expand Down
122 changes: 96 additions & 26 deletions cachi2/core/package_managers/yarn/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,22 @@

import json
import logging
import re
import zipfile
from collections import defaultdict
from dataclasses import dataclass
from functools import cached_property
from pathlib import Path
from textwrap import dedent
from typing import TYPE_CHECKING, Any, Mapping, Union
from urllib.parse import quote

import pydantic
from packageurl import PackageURL
from semver import Version

from cachi2.core.errors import PackageManagerError, PackageRejected, UnsupportedFeature
from cachi2.core.models.sbom import Component
eskultety marked this conversation as resolved.
Show resolved Hide resolved
from cachi2.core.models.sbom import Component, Patch, PatchDiff, Pedigree
from cachi2.core.package_managers.yarn.locators import (
FileLocator,
HttpsLocator,
Expand All @@ -31,7 +35,7 @@
parse_locator,
)
from cachi2.core.package_managers.yarn.project import Optional, Project
from cachi2.core.package_managers.yarn.utils import run_yarn_cmd
from cachi2.core.package_managers.yarn.utils import extract_yarn_version_from_env, run_yarn_cmd
from cachi2.core.rooted_path import RootedPath
from cachi2.core.scm import get_repo_id

Expand All @@ -43,6 +47,10 @@

log = logging.getLogger(__name__)

COMPAT_PATCHES_SUBPATH = "packages/plugin-compat/sources/patches"
COMPAT_PATCHES_REGEX = re.compile(r"builtin<compat/([^>]+)>")
YARN_REPO_URL = "https://github.com/yarnpkg/berry"


@dataclass(frozen=True)
class Package:
Expand Down Expand Up @@ -165,8 +173,18 @@ def create_components(
packages: list[Package], project: Project, output_dir: RootedPath
) -> list[Component]:
"""Create SBOM components for all the packages parsed from the 'yarn info' output."""
package_mapping = {package.parsed_locator: package for package in packages}
component_resolver = _ComponentResolver(package_mapping, project, output_dir)
package_mapping: dict[Locator, Package] = {}
patch_locators: list[PatchLocator] = []

# Patches are not components themselves, but they are necessary to
# resolve pedigree for their non-patch parent package components
for package in packages:
if isinstance(package.parsed_locator, PatchLocator):
patch_locators.append(package.parsed_locator)
else:
package_mapping[package.parsed_locator] = package

component_resolver = _ComponentResolver(package_mapping, patch_locators, project, output_dir)
return [component_resolver.get_component(package) for package in package_mapping.values()]


Expand All @@ -192,11 +210,51 @@ class _CouldNotResolve(ValueError):

class _ComponentResolver:
def __init__(
self, package_mapping: Mapping[Locator, Package], project: Project, output_dir: RootedPath
self,
package_mapping: Mapping[Locator, Package],
patch_locators: list[PatchLocator],
project: Project,
output_dir: RootedPath,
) -> None:
self._project = project
self._output_dir = output_dir
self._package_mapping = package_mapping
self._pedigree_mapping = self._get_pedigree_mapping(patch_locators)

def _get_pedigree_mapping(self, patch_locators: list[PatchLocator]) -> dict[Locator, Pedigree]:
"""Map locators for dependencies that get patched to their Pedigree."""
pedigree_mapping: defaultdict[Locator, Pedigree] = defaultdict(lambda: Pedigree(patches=[]))

if patch_locators:
# Builtin patches are included with the version of yarn being used
yarn_version = extract_yarn_version_from_env(self._project.source_dir)

for patch_locator in patch_locators:
# Patches can patch other patches, so find the true parent Component
patched_package = self._get_patched_package(patch_locator)
pedigree = pedigree_mapping[patched_package]

for patch in patch_locator.patches:
patch_url = self._get_patch_url(patch_locator, patch, yarn_version)
Dismissed Show dismissed Hide dismissed
pedigree.patches.append(Patch(type="unofficial", diff=PatchDiff(url=patch_url)))

return dict(pedigree_mapping)

def _get_patch_url(
self, patch_locator: PatchLocator, patch: Union[Path, str], yarn_version: Version
) -> str:
if isinstance(patch, Path):
return self._get_path_patch_url(patch_locator, patch)

return self._get_builtin_patch_url(patch, yarn_version)

def _get_patched_package(self, patch_locator: PatchLocator) -> Locator:
"""Return the non-patch parent package for a given patch locator."""
patched_locator = patch_locator.package
while isinstance(patched_locator, PatchLocator):
patched_locator = patched_locator.package

return patched_locator

def get_component(self, package: Package) -> Component:
"""Create an SBOM component for a yarn Package."""
Expand All @@ -217,6 +275,7 @@ def get_component(self, package: Package) -> Component:
name=resolved_package.name,
version=resolved_package.version,
purl=purl,
pedigree=self._pedigree_mapping.get(package.parsed_locator),
)

@staticmethod
Expand Down Expand Up @@ -262,10 +321,7 @@ def _generate_purl_for_package(package: _ResolvedPackage, project: Project) -> s
subpath = str(normalized.subpath_from_root)

elif isinstance(package.locator, PatchLocator):
# ignore patch locators
# the actual dependency that is patched is reported separately
# the patch itself will be reported via SBOM pedigree patches
pass
raise _CouldNotResolve("Patches cannot be resolved into Components")
else:
assert_never(package.locator)

Expand Down Expand Up @@ -329,23 +385,7 @@ def log_for_locator(msg: str, *args: Any, level: int = logging.DEBUG) -> None:
)
name, version = self._read_name_version_from_packjson(packjson)
elif isinstance(locator, PatchLocator):
if (
package.cache_path
# yarn info seems to always report the cache path for patch dependencies,
# but the path doesn't always exist
and (cache_path := self._cache_path_as_rooted(package.cache_path)).path.exists()
):
log_for_locator("reading package name from %s", cache_path.subpath_from_root)
name = self._read_name_from_cache(cache_path)
elif orig_package := self._package_mapping.get(locator.package):
log_for_locator("resolving the name of the original package")
name = self._resolve_package(orig_package).name
else:
raise _CouldNotResolve(
"the 'yarn info' output does not include either an existing zip archive "
"or the original unpatched package",
)
version = package.version
raise _CouldNotResolve("Patches cannot be resolved into Components")
else:
# This line can never be reached assuming type-checker checks are passing
# https://typing.readthedocs.io/en/latest/source/unreachable.html#assert-never-and-exhaustiveness-checking
Expand Down Expand Up @@ -412,3 +452,33 @@ def _cache_path_as_rooted(self, cache_path: str) -> RootedPath:
return self._project_subpath(cache_path)
else:
return self._output_dir.join_within_root(cache_path)

def _get_path_patch_url(self, patch_locator: PatchLocator, patch_path: Path) -> str:
"""Return a PURL-style VCS URL qualifier with subpath for a Patch."""
if patch_locator.locator is None:
raise UnsupportedFeature(
(
f"{patch_locator} is missing an associated workspace locator "
"and Cachi2 expects all non-builtin yarn patches to have one"
)
)

project_path = self._project.source_dir
workspace_path = patch_locator.locator.relpath
normalized = self._project.source_dir.join_within_root(workspace_path, patch_path)
repo_url = get_repo_id(project_path.root).as_vcs_url_qualifier()
subpath_from_root = str(normalized.subpath_from_root)

return f"{repo_url}#{subpath_from_root}"

def _get_builtin_patch_url(self, patch: str, yarn_version: Version) -> str:
"""Return a PURL-style VCS URL qualifier with subpath for a builtin Patch."""
match = re.match(COMPAT_PATCHES_REGEX, patch)
if not match:
raise UnsupportedFeature(f"{patch} is not a builtin patch from plugin-compat")

patch_filename = f"{match.group(1)}.patch.ts"
patch_subpath = Path(COMPAT_PATCHES_SUBPATH, patch_filename)
yarn_git_tag = quote(f"@yarnpkg/cli/{yarn_version}")

return f"git+{YARN_REPO_URL}@{yarn_git_tag}#{patch_subpath}"
64 changes: 52 additions & 12 deletions tests/integration/test_data/yarn_e2e_test/bom.json
Original file line number Diff line number Diff line change
Expand Up @@ -314,18 +314,16 @@
},
{
"name": "cachito-npm-without-deps",
"properties": [
{
"name": "cachi2:found_by",
"value": "cachi2"
}
],
"purl": "pkg:npm/[email protected]",
"type": "library",
"version": "1.0.0"
},
{
"name": "cachito-npm-without-deps",
"pedigree": {
"patches": [
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@70515793108df42547d3320c7ea4cd6b6e505c46#.yarn/patches/[email protected]"
},
"type": "unofficial"
}
]
},
"properties": [
{
"name": "cachi2:found_by",
Expand Down Expand Up @@ -662,6 +660,22 @@
},
{
"name": "fsevents",
"pedigree": {
"patches": [
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@70515793108df42547d3320c7ea4cd6b6e505c46#my-patches/fsevents.patch"
},
"type": "unofficial"
},
{
"diff": {
"url": "git+https://github.com/yarnpkg/berry@%40yarnpkg/cli/3.6.1#packages/plugin-compat/sources/patches/fsevents.patch.ts"
},
"type": "unofficial"
}
]
},
"properties": [
{
"name": "cachi2:found_by",
Expand Down Expand Up @@ -961,6 +975,22 @@
},
{
"name": "left-pad",
"pedigree": {
"patches": [
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@70515793108df42547d3320c7ea4cd6b6e505c46#my-patches/left-pad.patch"
},
"type": "unofficial"
},
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@70515793108df42547d3320c7ea4cd6b6e505c46#my-patches/left-pad-2.patch"
},
"type": "unofficial"
}
]
},
"properties": [
{
"name": "cachi2:found_by",
Expand Down Expand Up @@ -1691,6 +1721,16 @@
},
{
"name": "typescript",
"pedigree": {
"patches": [
{
"diff": {
"url": "git+https://github.com/yarnpkg/berry@%40yarnpkg/cli/3.6.1#packages/plugin-compat/sources/patches/typescript.patch.ts"
},
"type": "unofficial"
}
]
},
"properties": [
{
"name": "cachi2:found_by",
Expand Down
Loading
Loading