Skip to content

Commit

Permalink
yarn v1: Improve offline mirror collision detection
Browse files Browse the repository at this point in the history
Replace the previous duplicate tarball check with a more accurate collision
detection mechanism. The new implementation verifies that all packages mapped
to the same tarball name in the offline mirror are actually identical.

The previous Counter-based approach only checked for duplicate filenames
without validating package equivalence. The new defaultdict-based solution
properly detects cases where different packages would overwrite each other's
tarballs in the mirror.

Update test cases to cover valid duplicate scenarios (identical packages) and
invalid collisions (different packages with same tarball name). Adjust error
messages to better reflect the nature of detected issues.

Signed-off-by: Michal Šoltis <[email protected]>
  • Loading branch information
slimreaper35 committed Jan 24, 2025
1 parent 3800b60 commit 9b0ed16
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 47 deletions.
28 changes: 19 additions & 9 deletions cachi2/core/package_managers/yarn_classic/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from collections import Counter
from collections import defaultdict
from pathlib import Path
from typing import Any, Iterable

Expand Down Expand Up @@ -202,23 +202,33 @@ def _verify_corepack_yarn_version(source_dir: RootedPath, env: dict[str, str]) -


def _verify_no_offline_mirror_collisions(packages: Iterable[YarnClassicPackage]) -> None:
"""Verify that there are no duplicate tarballs in the offline mirror."""
tarballs = []
"""
Verify that there are no duplicate tarballs in the offline mirror.
This is a safety check to ensure that the offline mirror is not corrupted.
The only exception is when all the packages are the same. It may happen when
yarn.lock contains multiple references to the same package, as it is with npm aliases.
"""
tarball_collisions = defaultdict(list)

for p in packages:
if isinstance(p, (RegistryPackage, UrlPackage)):
tarball_name = get_tarball_mirror_name(p.url)
tarballs.append(tarball_name)
elif isinstance(p, GitPackage):
tarball_name = get_git_tarball_mirror_name(p.url)
tarballs.append(tarball_name)
else:
# file, link, and workspace packages are not copied to the offline mirror
continue

c = Counter(tarballs)
duplicate_tarballs = [f"{name} ({count}x)" for name, count in c.most_common() if count > 1]
if len(duplicate_tarballs) > 0:
raise PackageManagerError(f"Duplicate tarballs detected: {', '.join(duplicate_tarballs)}")
tarball_collisions[tarball_name].append(p)

for tarball_name, packages in tarball_collisions.items():
if all(pkg == packages[0] for pkg in packages):
continue

raise PackageManagerError(
f"Tarball collision in the offline mirror: {tarball_name} ({len(packages)}x)"
)


# References
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_yarn_classic.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
check_deps_checksums=False,
check_vendor_checksums=False,
expected_exit_code=1,
expected_output="Duplicate tarballs detected",
expected_output="Tarball collision in the offline mirror",
),
id="yarn_classic_offline_mirror_collision",
),
Expand Down
116 changes: 79 additions & 37 deletions tests/unit/package_managers/yarn_classic/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@
)
from cachi2.core.package_managers.yarn_classic.project import Project
from cachi2.core.package_managers.yarn_classic.resolver import (
GitPackage,
FilePackage,
LinkPackage,
RegistryPackage,
UrlPackage,
YarnClassicPackage,
)
from cachi2.core.rooted_path import RootedPath
Expand Down Expand Up @@ -247,47 +249,87 @@ def test_verify_corepack_yarn_version_invalid_version(
_verify_corepack_yarn_version(RootedPath(tmp_path), env={"foo": "bar"})


def test_verify_offline_mirror_collisions_registry_packages() -> None:
packages: Iterable[YarnClassicPackage] = [
RegistryPackage(
name="foo",
version="1.0.0",
url="https://registry.yarnpkg.com/same/-/same-1.0.0.tgz",
@pytest.mark.parametrize(
"packages",
[
pytest.param(
[
RegistryPackage(
name="foo",
version="1.0.0",
url="https://registry.yarnpkg.com/same/-/same-1.0.0.tgz",
),
RegistryPackage(
name="foo",
version="1.0.0",
url="https://registry.yarnpkg.com/same/-/same-1.0.0.tgz",
),
],
id="same_registry_packages",
),
RegistryPackage(
name="bar",
version="1.0.0",
url="https://registry.yarnpkg.com/same/-/same-1.0.0.tgz",
pytest.param(
[
RegistryPackage(
name="foo",
version="1.0.0",
url="https://registry.yarnpkg.com/@colors/colors/-/colors-1.6.0.tgz",
),
RegistryPackage(
name="foo",
version="1.0.0",
url="https://registry.yarnpkg.com/@colors/colors/-/colors-1.6.0.tgz",
),
],
id="same_scoped_registry_packages",
),
]

with pytest.raises(PackageManagerError):
_verify_no_offline_mirror_collisions(packages)
pytest.param(
[
LinkPackage(name="foo", version="1.0.0", path=RootedPath("/path/to/foo")),
FilePackage(name="bar", version="1.0.0", path=RootedPath("/path/to/bar")),
],
id="skipped_packages",
),
],
)
def test_verify_offline_mirror_collisions_pass(packages: Iterable[YarnClassicPackage]) -> None:
_verify_no_offline_mirror_collisions(packages)


def test_verify_offline_mirror_collisions_scoped_registry_packages() -> None:
packages: Iterable[YarnClassicPackage] = [
RegistryPackage(
name="foo",
version="1.0.0",
url="https://registry.yarnpkg.com/@colors/colors/-/colors-1.6.0.tgz",
@pytest.mark.parametrize(
"packages",
[
pytest.param(
[
RegistryPackage(
name="foo",
version="1.0.0",
url="https://registry.yarnpkg.com/same/-/same-1.0.0.tgz",
),
UrlPackage(
name="bar",
version="1.0.0",
url="https://mirror.example.com/same-1.0.0.tgz",
),
],
id="registry_and_url_package_conflict",
),
RegistryPackage(
name="bar",
version="1.0.0",
url="https://registry.yarnpkg.com/@colors/colors/-/colors-1.6.0.tgz",
pytest.param(
[
UrlPackage(
name="foo",
version="1.0.0",
url="https://mirror.example.com/same-1.0.0.tgz",
),
UrlPackage(
name="bar",
version="1.0.0",
url="https://mirror.example.com/same-1.0.0.tgz",
),
],
id="url_and_url_package_conflict",
),
]

with pytest.raises(PackageManagerError):
_verify_no_offline_mirror_collisions(packages)


def test_verify_offline_mirror_collisions_git_packages() -> None:
packages: Iterable[YarnClassicPackage] = [
GitPackage(name="foo", version="1.0.0", url="https://github.com/user/repo.git#commit-hash"),
GitPackage(name="bar", version="1.0.0", url="https://github.com/user/repo.git#commit-hash"),
]

],
)
def test_verify_offline_mirror_collisions_fail(packages: Iterable[YarnClassicPackage]) -> None:
with pytest.raises(PackageManagerError):
_verify_no_offline_mirror_collisions(packages)

0 comments on commit 9b0ed16

Please sign in to comment.