From 9b0ed165b9f768d0abbf40f70bc0174edffb678f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C5=A0oltis?= Date: Wed, 22 Jan 2025 17:20:05 +0100 Subject: [PATCH] yarn v1: Improve offline mirror collision detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the previous duplicate tarball check with a more accurate collision detection mechanism. The new implementation verifies that all packages mapped to the same tarball name in the offline mirror are actually identical. The previous Counter-based approach only checked for duplicate filenames without validating package equivalence. The new defaultdict-based solution properly detects cases where different packages would overwrite each other's tarballs in the mirror. Update test cases to cover valid duplicate scenarios (identical packages) and invalid collisions (different packages with same tarball name). Adjust error messages to better reflect the nature of detected issues. Signed-off-by: Michal Ĺ oltis --- .../package_managers/yarn_classic/main.py | 28 +++-- tests/integration/test_yarn_classic.py | 2 +- .../yarn_classic/test_main.py | 116 ++++++++++++------ 3 files changed, 99 insertions(+), 47 deletions(-) diff --git a/cachi2/core/package_managers/yarn_classic/main.py b/cachi2/core/package_managers/yarn_classic/main.py index 4b6c160f4..fa271ae7b 100644 --- a/cachi2/core/package_managers/yarn_classic/main.py +++ b/cachi2/core/package_managers/yarn_classic/main.py @@ -1,5 +1,5 @@ import logging -from collections import Counter +from collections import defaultdict from pathlib import Path from typing import Any, Iterable @@ -202,23 +202,33 @@ def _verify_corepack_yarn_version(source_dir: RootedPath, env: dict[str, str]) - def _verify_no_offline_mirror_collisions(packages: Iterable[YarnClassicPackage]) -> None: - """Verify that there are no duplicate tarballs in the offline mirror.""" - tarballs = [] + """ + Verify that there are no duplicate tarballs in the offline mirror. + + This is a safety check to ensure that the offline mirror is not corrupted. + The only exception is when all the packages are the same. It may happen when + yarn.lock contains multiple references to the same package, as it is with npm aliases. + """ + tarball_collisions = defaultdict(list) + for p in packages: if isinstance(p, (RegistryPackage, UrlPackage)): tarball_name = get_tarball_mirror_name(p.url) - tarballs.append(tarball_name) elif isinstance(p, GitPackage): tarball_name = get_git_tarball_mirror_name(p.url) - tarballs.append(tarball_name) else: # file, link, and workspace packages are not copied to the offline mirror continue - c = Counter(tarballs) - duplicate_tarballs = [f"{name} ({count}x)" for name, count in c.most_common() if count > 1] - if len(duplicate_tarballs) > 0: - raise PackageManagerError(f"Duplicate tarballs detected: {', '.join(duplicate_tarballs)}") + tarball_collisions[tarball_name].append(p) + + for tarball_name, packages in tarball_collisions.items(): + if all(pkg == packages[0] for pkg in packages): + continue + + raise PackageManagerError( + f"Tarball collision in the offline mirror: {tarball_name} ({len(packages)}x)" + ) # References diff --git a/tests/integration/test_yarn_classic.py b/tests/integration/test_yarn_classic.py index f3b42124f..b31508ea2 100644 --- a/tests/integration/test_yarn_classic.py +++ b/tests/integration/test_yarn_classic.py @@ -85,7 +85,7 @@ check_deps_checksums=False, check_vendor_checksums=False, expected_exit_code=1, - expected_output="Duplicate tarballs detected", + expected_output="Tarball collision in the offline mirror", ), id="yarn_classic_offline_mirror_collision", ), diff --git a/tests/unit/package_managers/yarn_classic/test_main.py b/tests/unit/package_managers/yarn_classic/test_main.py index cefdc0799..bfb430964 100644 --- a/tests/unit/package_managers/yarn_classic/test_main.py +++ b/tests/unit/package_managers/yarn_classic/test_main.py @@ -22,8 +22,10 @@ ) from cachi2.core.package_managers.yarn_classic.project import Project from cachi2.core.package_managers.yarn_classic.resolver import ( - GitPackage, + FilePackage, + LinkPackage, RegistryPackage, + UrlPackage, YarnClassicPackage, ) from cachi2.core.rooted_path import RootedPath @@ -247,47 +249,87 @@ def test_verify_corepack_yarn_version_invalid_version( _verify_corepack_yarn_version(RootedPath(tmp_path), env={"foo": "bar"}) -def test_verify_offline_mirror_collisions_registry_packages() -> None: - packages: Iterable[YarnClassicPackage] = [ - RegistryPackage( - name="foo", - version="1.0.0", - url="https://registry.yarnpkg.com/same/-/same-1.0.0.tgz", +@pytest.mark.parametrize( + "packages", + [ + pytest.param( + [ + RegistryPackage( + name="foo", + version="1.0.0", + url="https://registry.yarnpkg.com/same/-/same-1.0.0.tgz", + ), + RegistryPackage( + name="foo", + version="1.0.0", + url="https://registry.yarnpkg.com/same/-/same-1.0.0.tgz", + ), + ], + id="same_registry_packages", ), - RegistryPackage( - name="bar", - version="1.0.0", - url="https://registry.yarnpkg.com/same/-/same-1.0.0.tgz", + pytest.param( + [ + RegistryPackage( + name="foo", + version="1.0.0", + url="https://registry.yarnpkg.com/@colors/colors/-/colors-1.6.0.tgz", + ), + RegistryPackage( + name="foo", + version="1.0.0", + url="https://registry.yarnpkg.com/@colors/colors/-/colors-1.6.0.tgz", + ), + ], + id="same_scoped_registry_packages", ), - ] - - with pytest.raises(PackageManagerError): - _verify_no_offline_mirror_collisions(packages) + pytest.param( + [ + LinkPackage(name="foo", version="1.0.0", path=RootedPath("/path/to/foo")), + FilePackage(name="bar", version="1.0.0", path=RootedPath("/path/to/bar")), + ], + id="skipped_packages", + ), + ], +) +def test_verify_offline_mirror_collisions_pass(packages: Iterable[YarnClassicPackage]) -> None: + _verify_no_offline_mirror_collisions(packages) -def test_verify_offline_mirror_collisions_scoped_registry_packages() -> None: - packages: Iterable[YarnClassicPackage] = [ - RegistryPackage( - name="foo", - version="1.0.0", - url="https://registry.yarnpkg.com/@colors/colors/-/colors-1.6.0.tgz", +@pytest.mark.parametrize( + "packages", + [ + pytest.param( + [ + RegistryPackage( + name="foo", + version="1.0.0", + url="https://registry.yarnpkg.com/same/-/same-1.0.0.tgz", + ), + UrlPackage( + name="bar", + version="1.0.0", + url="https://mirror.example.com/same-1.0.0.tgz", + ), + ], + id="registry_and_url_package_conflict", ), - RegistryPackage( - name="bar", - version="1.0.0", - url="https://registry.yarnpkg.com/@colors/colors/-/colors-1.6.0.tgz", + pytest.param( + [ + UrlPackage( + name="foo", + version="1.0.0", + url="https://mirror.example.com/same-1.0.0.tgz", + ), + UrlPackage( + name="bar", + version="1.0.0", + url="https://mirror.example.com/same-1.0.0.tgz", + ), + ], + id="url_and_url_package_conflict", ), - ] - - with pytest.raises(PackageManagerError): - _verify_no_offline_mirror_collisions(packages) - - -def test_verify_offline_mirror_collisions_git_packages() -> None: - packages: Iterable[YarnClassicPackage] = [ - GitPackage(name="foo", version="1.0.0", url="https://github.com/user/repo.git#commit-hash"), - GitPackage(name="bar", version="1.0.0", url="https://github.com/user/repo.git#commit-hash"), - ] - + ], +) +def test_verify_offline_mirror_collisions_fail(packages: Iterable[YarnClassicPackage]) -> None: with pytest.raises(PackageManagerError): _verify_no_offline_mirror_collisions(packages)