Skip to content

Commit

Permalink
feat: update flatten to deduplicate files
Browse files Browse the repository at this point in the history
As currently implemented, this option does not deduplicate files, which means the duplicate file path errors can still occur in some cases.

This change updates `flatten.sh` to deduplicate files as well, with the final file in the archive being preserved for each path.

It also renames `assert_tar_listing` to `assert_tar_mtree` and creates a new `assert_tar_listing` rule that lists archive contents instead of exporting an mtree.
  • Loading branch information
westrik committed Dec 9, 2024
1 parent 073917d commit 3275814
Show file tree
Hide file tree
Showing 11 changed files with 99 additions and 36 deletions.
28 changes: 16 additions & 12 deletions distroless/private/flatten.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ if [[ "$output" != "-" ]]; then
fi
done


# There not a lot happening here but there is still too many implicit knowledge.
#
# When we run bsdtar, we ask for it to prompt every entry, in the same order we created above, the mtree.
Expand All @@ -29,21 +28,26 @@ if [[ "$output" != "-" ]]; then
# See: https://github.com/libarchive/libarchive/blob/f745a848d7a81758cd9fcd49d7fd45caeebe1c3d/tar/util.c#L240
# See: https://github.com/libarchive/libarchive/blob/f745a848d7a81758cd9fcd49d7fd45caeebe1c3d/tar/util.c#L216
#
# And finally we iterate over all the entries generating 31 bytes of interleaved 'Y' or 'N' date based on if
# we came across the entry before, for directories the first occurrence is kept, and for files copies are
# preserved.
# To match the extraction behavior of tar itself, we want to preserve only the final occurrence of each file
# and directory in the archive. To do this, we iterate over all the entries twice. The first pass computes the
# number of occurrences of each path, and the second pass determines whether each entry is the final (or only)
# occurrence of that path.

$bsdtar --confirmation "$@" > $output 2< <(awk '{
if (substr($0,0,1) == "#") {
next;
}
count[$1]++;
files[NR] = $1
}
END {
ORS=""
keep="n"
if (count[$1] == 1 || $1 !~ "/$") {
keep="y"
for (i=1; i<=NR; i++) {
seen[files[i]]++
keep="n"
if (count[files[i]] == seen[files[i]]) {
keep="y"
}
for (j=0; j<31; j++) print keep
fflush()
}
for (i=0;i<31;i++) print keep
fflush()
}' "$mtree")
rm "$mtree"
else
Expand Down
46 changes: 44 additions & 2 deletions distroless/tests/asserts.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,58 @@
load("@aspect_bazel_lib//lib:diff_test.bzl", "diff_test")
load("@bazel_skylib//rules:write_file.bzl", "write_file")

# buildifier: disable=function-docstring
def assert_tar_mtree(name, actual, expected):
"""
Assert that an mtree representation of a tarball matches an expected value.
Args:
name: name of this assertion
actual: label for a tarball
expected: expected mtree
"""
actual_mtree = "_{}_mtree".format(name)
expected_mtree = "_{}_expected".format(name)

native.genrule(
name = actual_mtree,
srcs = [actual],
outs = ["_{}.mtree".format(name)],
cmd = "cat $(execpath {}) | $(BSDTAR_BIN) -cf $@ --format=mtree --options '!nlink' @-".format(actual),
toolchains = ["@bsd_tar_toolchains//:resolved_toolchain"],
)

write_file(
name = expected_mtree,
out = "_{}.expected".format(name),
content = [expected],
newline = "unix",
)

diff_test(
name = name,
file1 = actual_mtree,
file2 = expected_mtree,
timeout = "short",
)


def assert_tar_listing(name, actual, expected):
"""
Assert that the listed contents of a tarball match an expected value. This is useful when checking for duplicated paths.
Args:
name: name of this assertion
actual: label for a tarball
expected: expected listing
"""
actual_listing = "_{}_listing".format(name)
expected_listing = "_{}_expected".format(name)

native.genrule(
name = actual_listing,
srcs = [actual],
outs = ["_{}.listing".format(name)],
cmd = "cat $(execpath {}) | $(BSDTAR_BIN) -cf $@ --format=mtree --options '!nlink' @-".format(actual),
cmd = "cat $(execpath {}) | $(BSDTAR_BIN) -tf - > $@".format(actual),
toolchains = ["@bsd_tar_toolchains//:resolved_toolchain"],
)

Expand Down
4 changes: 2 additions & 2 deletions examples/cacerts/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
load("//distroless:defs.bzl", "cacerts")
load("//distroless/tests:asserts.bzl", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_tar_mtree")

cacerts(
name = "cacerts",
package = "@example-bullseye-ca-certificates//:data.tar.xz",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_cacerts",
actual = "cacerts",
expected = """\
Expand Down
25 changes: 21 additions & 4 deletions examples/flatten/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
load("@aspect_bazel_lib//lib:tar.bzl", "tar")
load("//distroless:defs.bzl", "flatten", "home", "passwd")
load("//distroless/tests:asserts.bzl", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_tar_listing", "assert_tar_mtree")

passwd(
name = "passwd",
Expand Down Expand Up @@ -47,7 +47,7 @@ flatten(
],
)

assert_tar_listing(
assert_tar_mtree(
name = "test_flatten",
actual = "flatten",
expected = """\
Expand Down Expand Up @@ -94,8 +94,8 @@ flatten(
],
)

assert_tar_listing(
name = "test_flatten_dedup",
assert_tar_mtree(
name = "test_flatten_dedup_mtree",
actual = "flatten_dedup",
expected = """\
#mtree
Expand All @@ -107,3 +107,20 @@ assert_tar_listing(
./examples/flatten/dir/sub/content.txt time=1672560000.0 mode=755 gid=0 uid=0 type=file size=0
""",
)

assert_tar_listing(
name = "test_flatten_dedup_listing",
actual = "flatten_dedup",
expected = """\
examples/
examples/flatten/
examples/flatten/dir/
examples/flatten/dir/changelog
examples/flatten/dir/sub/
examples/flatten/dir/sub/content.txt
examples/flatten/dir/changelog
examples/flatten/dir/changelog
examples/flatten/dir/sub/content.txt
""",
)

4 changes: 2 additions & 2 deletions examples/group/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
load("@aspect_bazel_lib//lib:diff_test.bzl", "diff_test")
load("//distroless:defs.bzl", "group")
load("//distroless/tests:asserts.bzl", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_tar_mtree")

group(
name = "group",
Expand Down Expand Up @@ -32,7 +32,7 @@ diff_test(
file2 = "group.expected.txt",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_group",
actual = "group",
expected = """\
Expand Down
4 changes: 2 additions & 2 deletions examples/home/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
load("//distroless:defs.bzl", "home")
load("//distroless/tests:asserts.bzl", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_tar_mtree")

home(
name = "home",
Expand All @@ -17,7 +17,7 @@ home(
],
)

assert_tar_listing(
assert_tar_mtree(
name = "test_home",
actual = "home",
expected = """\
Expand Down
4 changes: 2 additions & 2 deletions examples/java_keystore/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
load("//distroless:defs.bzl", "java_keystore")
load("//distroless/tests:asserts.bzl", "assert_jks_listing", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_jks_listing", "assert_tar_mtree")

java_keystore(
name = "java_keystore",
Expand All @@ -23,7 +23,7 @@ assert_jks_listing(
expected = "expected.jks.output",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_java_keystore",
actual = "java_keystore",
expected = """\
Expand Down
6 changes: 3 additions & 3 deletions examples/locale/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
load("//distroless:defs.bzl", "locale")
load("//distroless/tests:asserts.bzl", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_tar_mtree")

locale(
name = "bullseye",
charset = "C.UTF-8",
package = "@example-bullseye-libc-bin//:data.tar.xz",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_bullseye",
actual = "bullseye",
expected = """\
Expand Down Expand Up @@ -41,7 +41,7 @@ locale(
package = "@example-bookworm-libc-bin//:data.tar.xz",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_bookworm",
actual = "bookworm",
expected = """\
Expand Down
6 changes: 3 additions & 3 deletions examples/os_release/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
load("@aspect_bazel_lib//lib:diff_test.bzl", "diff_test")
load("//distroless:defs.bzl", "os_release")
load("//distroless/tests:asserts.bzl", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_tar_mtree")

os_release(
name = "os_release",
Expand All @@ -16,7 +16,7 @@ diff_test(
file2 = "content.expected.txt",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_os_release",
actual = "os_release",
expected = """\
Expand All @@ -37,7 +37,7 @@ os_release(
path = "/etc/os-release",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_os_release_alternative_path",
actual = "os_release_alternative_path",
expected = """\
Expand Down
4 changes: 2 additions & 2 deletions examples/passwd/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
load("@aspect_bazel_lib//lib:diff_test.bzl", "diff_test")
load("//distroless:defs.bzl", "passwd")
load("//distroless/tests:asserts.bzl", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_tar_mtree")

passwd(
name = "passwd",
Expand All @@ -22,7 +22,7 @@ diff_test(
file2 = "passwd.expected.txt",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_passwd",
actual = "passwd",
expected = """\
Expand Down
4 changes: 2 additions & 2 deletions examples/statusd/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# buildifier: disable=bzl-visibility
load("//apt:defs.bzl", "dpkg_statusd")
load("//distroless/tests:asserts.bzl", "assert_tar_listing")
load("//distroless/tests:asserts.bzl", "assert_tar_mtree")

dpkg_statusd(
name = "statusd",
package_name = "ca-certificates",
control = "@example-bullseye-ca-certificates//:control.tar.xz",
)

assert_tar_listing(
assert_tar_mtree(
name = "test_statusd",
actual = "statusd",
expected = """\
Expand Down

0 comments on commit 3275814

Please sign in to comment.