From c2fd9295fd4c8805db7d38cf6a4aa895e6fea43b Mon Sep 17 00:00:00 2001 From: Javier Maestro Date: Fri, 22 Nov 2024 23:31:46 +0000 Subject: [PATCH] fix: NVIDIA CUDA flat repos don't follow Debian repo spec Although the Debian repo spec for 'Filename' (see https://wiki.debian.org/DebianRepository/Format#Filename) clearly says that 'Filename' should be relative to the base directory of the repo and should be in canonical form (i.e. without '.' or '..') there are cases where this is not honored. In those cases we try to work around this by assuming 'Filename' is relative to the sources.list directory/ so we combine them and normalize the new 'Filename' path. Note that, so far, only the NVIDIA CUDA repos needed this workaround so maybe this heuristic will break for other repos that don't conform to the Debian repo spec. --- MODULE.bazel | 7 ++- apt/private/BUILD.bazel | 1 + apt/private/apt_deb_repository.bzl | 57 ++++++++++++++++++- apt/private/pkg.bzl | 2 +- apt/private/util.bzl | 17 ++++++ apt/tests/BUILD.bazel | 3 + apt/tests/apt_deb_repository_test.bzl | 5 ++ apt/tests/mocks.bzl | 5 +- apt/tests/pkg_test.bzl | 3 +- apt/tests/util_test.bzl | 25 ++++++++ examples/debian_flat_repo/BUILD.bazel | 52 +++++++++++++---- .../nvidia_ubuntu2404_cuda.lock.json | 23 ++++++++ .../nvidia_ubuntu2404_cuda.yaml | 23 ++++++++ .../debian_flat_repo/test_linux_amd64.yaml | 1 + .../debian_flat_repo/test_linux_arm64.yaml | 9 +++ 15 files changed, 212 insertions(+), 21 deletions(-) create mode 100644 apt/tests/util_test.bzl create mode 100644 examples/debian_flat_repo/nvidia_ubuntu2404_cuda.lock.json create mode 100644 examples/debian_flat_repo/nvidia_ubuntu2404_cuda.yaml create mode 100644 examples/debian_flat_repo/test_linux_arm64.yaml diff --git a/MODULE.bazel b/MODULE.bazel index 23dd29d..010c0c6 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -90,4 +90,9 @@ apt.install( lock = "//examples/ubuntu_snapshot:noble.lock.json", manifest = "//examples/ubuntu_snapshot:noble.yaml", ) -use_repo(apt, "apt_security", "apt_security_resolve", "bullseye", "bullseye_nolock", "bullseye_rproject", "noble", "shared_dependencies") +apt.install( + name = "nvidia_ubuntu2404_cuda", + lock = "//examples/debian_flat_repo:nvidia_ubuntu2404_cuda.lock.json", + manifest = "//examples/debian_flat_repo:nvidia_ubuntu2404_cuda.yaml", +) +use_repo(apt, "apt_security", "apt_security_resolve", "bullseye", "bullseye_nolock", "bullseye_rproject", "noble", "nvidia_ubuntu2404_cuda", "shared_dependencies") diff --git a/apt/private/BUILD.bazel b/apt/private/BUILD.bazel index 5897e4b..41c84c7 100644 --- a/apt/private/BUILD.bazel +++ b/apt/private/BUILD.bazel @@ -68,6 +68,7 @@ bzl_library( deps = [ ":nested_dict", ":version_constraint", + "@bazel_skylib//lib:paths", ], ) diff --git a/apt/private/apt_deb_repository.bzl b/apt/private/apt_deb_repository.bzl index c44950a..9d45a15 100644 --- a/apt/private/apt_deb_repository.bzl +++ b/apt/private/apt_deb_repository.bzl @@ -1,6 +1,8 @@ "https://wiki.debian.org/DebianRepository" +load("@bazel_skylib//lib:paths.bzl", "paths") load(":nested_dict.bzl", "nested_dict") +load(":util.bzl", "util") load(":version_constraint.bzl", "version_constraint") def _fetch_package_index(rctx, source): @@ -62,9 +64,46 @@ def _fetch_package_index(rctx, source): return rctx.read(source.output) +def _make_file_url(pkg, source): + filename = pkg["Filename"] + + invalid_filename = not paths.is_normalized( + filename, + look_for_same_level_references = True, + ) + + if invalid_filename: + # NOTE: + # Although the Debian repo spec for 'Filename' (see + # https://wiki.debian.org/DebianRepository/Format#Filename) clearly + # says that 'Filename' should be relative to the base directory of the + # repo and should be in canonical form (i.e. without '.' or '..') there + # are cases where this is not honored. + # + # In those cases we try to work around this by assuming 'Filename' is + # relative to the sources.list "index path" (e.g. directory/ for flat + # repos) so we combine them and normalize the new 'Filename' path. + # + # Note that, so far, only the NVIDIA CUDA repos needed this workaround + # so maybe this heuristic will break for other repos that don't conform + # to the Debian repo spec. + filename = paths.normalize(paths.join(source.index_path, filename)) + + base_url = util.parse_url(source.base_url) + file_url = "{}://{}{}".format( + base_url.scheme, + base_url.host, + paths.join(base_url.path, filename), + ) + + return file_url, invalid_filename + def _parse_package_index(state, contents, source): last_key = "" pkg = {} + total_pkgs = 0 + out_of_spec = [] + for group in contents.split("\n\n"): for line in group.split("\n"): if line.strip() == "": @@ -91,10 +130,17 @@ def _parse_package_index(state, contents, source): pkg[key] = value if len(pkg.keys()) != 0: - pkg["Root"] = source.base_url + pkg["File-Url"], invalid_filename = _make_file_url(pkg, source) + + if invalid_filename: + out_of_spec.append(pkg["Package"]) + _add_package(state, pkg) last_key = "" pkg = {} + total_pkgs += 1 + + return out_of_spec, total_pkgs def _add_package(state, package): state.packages.set( @@ -124,7 +170,13 @@ def _new(rctx, manifest): output = _fetch_package_index(rctx, source) rctx.report_progress("Parsing package index: %s" % index) - _parse_package_index(state, output, source) + out_of_spec, total_pkgs = _parse_package_index(state, output, source) + + if out_of_spec: + count = len(out_of_spec) + pct = int(100.0 * count / total_pkgs) + msg = "Warning: index {} has {} packages ({}%) with invalid 'Filename' fields" + print(msg.format(index, count, pct)) return struct( package_versions = lambda arch, name: state.packages.get((arch, name), {}).keys(), @@ -136,6 +188,7 @@ deb_repository = struct( new = _new, __test__ = struct( _fetch_package_index = _fetch_package_index, + _make_file_url = _make_file_url, _parse_package_index = _parse_package_index, ), ) diff --git a/apt/private/pkg.bzl b/apt/private/pkg.bzl index 8557a99..ff9fbb0 100644 --- a/apt/private/pkg.bzl +++ b/apt/private/pkg.bzl @@ -4,7 +4,7 @@ def _pkg_from_index(package, arch): return struct( name = package["Package"], version = package["Version"], - url = "%s/%s" % (package["Root"], package["Filename"]), + url = package["File-Url"], sha256 = package["SHA256"], arch = arch, dependencies = [], diff --git a/apt/private/util.bzl b/apt/private/util.bzl index d28351d..fd3913d 100644 --- a/apt/private/util.bzl +++ b/apt/private/util.bzl @@ -11,6 +11,22 @@ def _get_dupes(list_): return dupes +def _parse_url(url): + if "://" not in url: + fail("Invalid URL: %s" % url) + + scheme, url_ = url.split("://", 1) + + path = "/" + + if "/" in url_: + host, path_ = url_.split("/", 1) + path += path_ + else: + host = url_ + + return struct(scheme = scheme, host = host, path = path) + def _sanitize(str): return str.replace("+", "-p-").replace(":", "-").replace("~", "_") @@ -22,6 +38,7 @@ def _warning(rctx, message): util = struct( get_dupes = _get_dupes, + parse_url = _parse_url, sanitize = _sanitize, warning = _warning, ) diff --git a/apt/tests/BUILD.bazel b/apt/tests/BUILD.bazel index 36e2a86..c870f65 100644 --- a/apt/tests/BUILD.bazel +++ b/apt/tests/BUILD.bazel @@ -4,6 +4,7 @@ load(":lockfile_test.bzl", "lockfile_tests") load(":manifest_test.bzl", "manifest_tests") load(":nested_dict_test.bzl", "nested_dict_tests") load(":pkg_test.bzl", "pkg_tests") +load(":util_test.bzl", "util_tests") load(":version_constraint_test.bzl", "version_constraint_tests") load(":version_test.bzl", "version_tests") @@ -19,6 +20,8 @@ nested_dict_tests() pkg_tests() +util_tests() + version_tests() version_constraint_tests() diff --git a/apt/tests/apt_deb_repository_test.bzl b/apt/tests/apt_deb_repository_test.bzl index bc53a74..d3abf09 100644 --- a/apt/tests/apt_deb_repository_test.bzl +++ b/apt/tests/apt_deb_repository_test.bzl @@ -27,6 +27,11 @@ def new_setup(pkgs = None): manifest = mock.manifest(pkgs_names.keys(), arch = arch) source = manifest.sources[0] + for idx in range(len(pkgs)): + pkg = pkgs[idx] + file_url, _ = deb_repository.__test__._make_file_url(pkg, source) + pkg["File-Url"] = file_url + packages_index_content = mock.packages_index_content(*pkgs) mock_rctx = mock.rctx( diff --git a/apt/tests/mocks.bzl b/apt/tests/mocks.bzl index 87944ac..d4895f9 100644 --- a/apt/tests/mocks.bzl +++ b/apt/tests/mocks.bzl @@ -73,7 +73,7 @@ _PKG_INDEX = { "Package": _PKG_LOCK_V2["name"], "Architecture": _PKG_LOCK_V2["arch"], "Version": _PKG_LOCK_V2["version"], - "Root": "/".join(_PKG_LOCK_V2["url"].split("/")[:-1]), + "File-Url": _PKG_LOCK_V2["url"], "Filename": _PKG_LOCK_V2["url"].split("/")[-1], "SHA256": _PKG_LOCK_V2["sha256"], } @@ -82,7 +82,7 @@ _PKG_INDEX_DEPS = [ { "Package": d["name"], "Version": d["version"], - "Root": "http://nowhere", + "File-Url": "http://nowhere/foo.deb", "Filename": "foo.deb", "SHA256": "deadbeef" * 8, } @@ -189,7 +189,6 @@ def _rctx(**kwargs): def _pkg(package, **kwargs): defaults = { "Package": package, - "Root": _URL, "Filename": "/foo/bar/pkg.deb", "SHA256": "deadbeef" * 8, } diff --git a/apt/tests/pkg_test.bzl b/apt/tests/pkg_test.bzl index 10cb657..4a65107 100644 --- a/apt/tests/pkg_test.bzl +++ b/apt/tests/pkg_test.bzl @@ -32,8 +32,7 @@ def _from_index_test(ctx): asserts.equals(env, arch, p.arch) asserts.equals(env, mock_value.PKG_INDEX["Package"], p.name) - asserts.true(env, p.url.startswith(mock_value.PKG_INDEX["Root"])) - asserts.true(env, p.url.endswith(mock_value.PKG_INDEX["Filename"])) + asserts.true(env, p.url, mock_value.PKG_INDEX["File-Url"]) return unittest.end(env) diff --git a/apt/tests/util_test.bzl b/apt/tests/util_test.bzl new file mode 100644 index 0000000..57e2dd8 --- /dev/null +++ b/apt/tests/util_test.bzl @@ -0,0 +1,25 @@ +"unit tests for util" + +load("@bazel_skylib//lib:unittest.bzl", "asserts", "unittest") +load("//apt/private:util.bzl", "util") + +_TEST_SUITE_PREFIX = "util/" + +def _parse_url_test(ctx): + env = unittest.begin(ctx) + + parameters = { + "https://mirror.com": struct(scheme = "https", host = "mirror.com", path = "/"), + "http://mirror.com/foo/bar": struct(scheme = "http", host = "mirror.com", path = "/foo/bar"), + } + + for url, expected in parameters.items(): + actual = util.parse_url(url) + asserts.equals(env, expected, actual) + + return unittest.end(env) + +parse_url_test = unittest.make(_parse_url_test) + +def util_tests(): + parse_url_test(name = _TEST_SUITE_PREFIX + "parse_url") diff --git a/examples/debian_flat_repo/BUILD.bazel b/examples/debian_flat_repo/BUILD.bazel index d491029..f536061 100644 --- a/examples/debian_flat_repo/BUILD.bazel +++ b/examples/debian_flat_repo/BUILD.bazel @@ -5,28 +5,51 @@ load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load") PACKAGES = [ "@bullseye//dpkg", "@bullseye//apt", +] + +PACKAGES_AMD64 = PACKAGES + [ "@bullseye_rproject//r-mathlib", + "@nvidia_ubuntu2404_cuda//nvidia-container-toolkit-base", +] + +PACKAGES_ARM64 = PACKAGES + [ + "@nvidia_ubuntu2404_cuda//nvidia-container-toolkit-base", ] # Creates /var/lib/dpkg/status with installed package information. dpkg_status( name = "dpkg_status", - controls = [ - "%s/amd64:control" % package - for package in PACKAGES - ], + controls = select({ + "@platforms//cpu:x86_64": [ + "%s/amd64:control" % package + for package in PACKAGES_AMD64 + ], + "@platforms//cpu:arm64": [ + "%s/arm64:control" % package + for package in PACKAGES_ARM64 + ], + }), ) oci_image( name = "apt", - architecture = "amd64", + architecture = select({ + "@platforms//cpu:x86_64": "amd64", + "@platforms//cpu:arm64": "arm64", + }), os = "linux", tars = [ ":dpkg_status", - ] + [ - "%s/amd64" % package - for package in PACKAGES - ], + ] + select({ + "@platforms//cpu:x86_64": [ + "%s/amd64" % package + for package in PACKAGES_AMD64 + ], + "@platforms//cpu:arm64": [ + "%s/arm64" % package + for package in PACKAGES_ARM64 + ], + }), ) oci_load( @@ -39,10 +62,15 @@ oci_load( container_structure_test( name = "test", - configs = ["test_linux_amd64.yaml"], + configs = select({ + "@platforms//cpu:x86_64": ["test_linux_amd64.yaml"], + "@platforms//cpu:arm64": ["test_linux_arm64.yaml"], + }), image = ":apt", - target_compatible_with = [ - "@platforms//cpu:x86_64", + target_compatible_with = select({ + "@platforms//cpu:x86_64": ["@platforms//cpu:x86_64"], + "@platforms//cpu:arm64": ["@platforms//cpu:arm64"], + }) + [ "@platforms//os:linux", ], ) diff --git a/examples/debian_flat_repo/nvidia_ubuntu2404_cuda.lock.json b/examples/debian_flat_repo/nvidia_ubuntu2404_cuda.lock.json new file mode 100644 index 0000000..96f06b2 --- /dev/null +++ b/examples/debian_flat_repo/nvidia_ubuntu2404_cuda.lock.json @@ -0,0 +1,23 @@ +{ + "packages": { + "nvidia-container-toolkit-base": { + "amd64": { + "arch": "amd64", + "dependencies": [], + "name": "nvidia-container-toolkit-base", + "sha256": "a28af3f9a0e88ae04a9f346375c85f4609c8320dbaf1136af7522b54a24f53be", + "url": "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/nvidia-container-toolkit-base_1.17.2-1_amd64.deb", + "version": "1.17.2-1" + }, + "arm64": { + "arch": "arm64", + "dependencies": [], + "name": "nvidia-container-toolkit-base", + "sha256": "55f242746c244b2582fc354f57b5cf7fa467b8dec5105fc39f9f46602b8db590", + "url": "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/arm64/nvidia-container-toolkit-base_1.17.2-1_arm64.deb", + "version": "1.17.2-1" + } + } + }, + "version": 2 +} \ No newline at end of file diff --git a/examples/debian_flat_repo/nvidia_ubuntu2404_cuda.yaml b/examples/debian_flat_repo/nvidia_ubuntu2404_cuda.yaml new file mode 100644 index 0000000..9d1fd8c --- /dev/null +++ b/examples/debian_flat_repo/nvidia_ubuntu2404_cuda.yaml @@ -0,0 +1,23 @@ +# Packages for examples/debian_flat_repo. +# +# Anytime this file is changed, the lockfile needs to be regenerated. +# +# To generate the nvidia_cuda.lock.json run the following command +# +# bazel run @nvidia_ubuntu2404_cuda//:lock +# +# See debian_package_index at WORKSPACE.bazel +version: 1 + +sources: + - channel: ubuntu2404/x86_64/ + url: https://developer.download.nvidia.com/compute/cuda/repos + - channel: ubuntu2404/arm64/ + url: https://developer.download.nvidia.com/compute/cuda/repos + +archs: + - amd64 + - arm64 + +packages: + - nvidia-container-toolkit-base diff --git a/examples/debian_flat_repo/test_linux_amd64.yaml b/examples/debian_flat_repo/test_linux_amd64.yaml index 027c4bc..3e0f05e 100644 --- a/examples/debian_flat_repo/test_linux_amd64.yaml +++ b/examples/debian_flat_repo/test_linux_amd64.yaml @@ -7,3 +7,4 @@ commandTests: expectedOutput: - Listing\.\.\. - r-mathlib/now 4.4.2-1~bullseyecran.0 amd64 \[installed,local\] + - nvidia-container-toolkit-base/now 1.17.2-1 amd64 \[installed,local\] diff --git a/examples/debian_flat_repo/test_linux_arm64.yaml b/examples/debian_flat_repo/test_linux_arm64.yaml new file mode 100644 index 0000000..03dec48 --- /dev/null +++ b/examples/debian_flat_repo/test_linux_arm64.yaml @@ -0,0 +1,9 @@ +schemaVersion: "2.0.0" + +commandTests: + - name: "apt list --installed" + command: "apt" + args: ["list", "--installed"] + expectedOutput: + - Listing\.\.\. + - nvidia-container-toolkit-base/now 1.17.2-1 arm64 \[installed,local\]