Skip to content

Commit

Permalink
remove gnutar dep
Browse files Browse the repository at this point in the history
  • Loading branch information
thesayyn committed Dec 7, 2024
1 parent bda06c2 commit 297a974
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 35 deletions.
13 changes: 4 additions & 9 deletions distroless/private/flatten.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,12 @@ flatten = rule(
doc = "List of tars to flatten",
),
"deduplicate": attr.bool(doc = """\
EXPERIMENTAL: Remove duplicate entries from the archives after flattening.
This requires `awk`, `sort` and `tar` to be available in the PATH.
EXPERIMENTAL: We may change or remove it without a notice.
To support macOS, presence of `gtar` is checked, and `tar` if it does not exist,
and ensured if supports the `--delete` mode.
Remove duplicate entries from the archives after flattening.
Deduplication is performed only for directories.
On macOS: `brew install gnu-tar` can be run to install gnutar.
See: https://formulae.brew.sh/formula/gnu-tar
NOTE: You may also need to run `sudo ln -s /opt/homebrew/bin/gtar /usr/local/bin/gtar` to make it available to Bazel.
This requires `awk` to be available in the PATH.
""", default = False),
"compress": attr.string(
doc = "Compress the archive file with a supported algorithm.",
Expand Down
52 changes: 26 additions & 26 deletions distroless/private/flatten.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,46 +5,46 @@ bsdtar="$1";
output="$2";
shift 2;

function run_gtar() {
local TAR=
if [[ "$(command -v gtar)" ]]; then
TAR="gtar";
elif [[ "$(command -v tar)" ]]; then
TAR="tar";
else
echo "Neither 'tar' nor 'gtar' command is available.";
exit 1;
fi
"$TAR" "$@";
}


# Deduplication requested, use this complex pipeline to deduplicate.
if [[ "$output" != "-" ]]; then

mtree=$(mktemp)
duplicates=$(mktemp)

# List files in all archives and append to single column mtree.
for arg in "$@"; do
if [[ "$arg" == "@"* ]]; then
"$bsdtar" -cf - --format=mtree --options "mtree:!all,mtree:type" "$arg" >> "$mtree"
"$bsdtar" -tf "${arg:1}" >> "$mtree"
fi
done

awk '{

# There not a lot happening here but there is still too many implicit knowledge.
#
# When we run bsdtar, we ask for it to prompt every entry, in the same order we created above, the mtree.
# See: https://github.com/libarchive/libarchive/blob/f745a848d7a81758cd9fcd49d7fd45caeebe1c3d/tar/write.c#L683
#
# For every prompt, therefore entry, we have write 31 bytes of data, one of which has to be either 'Y' or 'N'.
# And the reason for it is that since we are not TTY and pretending to be one, we can't interleave write calls
# so we have to interleave it by filling up the buffer with 31 bytes of 'Y' or 'N'.
# See: https://github.com/libarchive/libarchive/blob/f745a848d7a81758cd9fcd49d7fd45caeebe1c3d/tar/util.c#L240
# See: https://github.com/libarchive/libarchive/blob/f745a848d7a81758cd9fcd49d7fd45caeebe1c3d/tar/util.c#L216
#
# And finally we iterate over all the entries generating 31 bytes of interleaved 'Y' or 'N' date based on if
# we came across the entry before, for directories the first occurrence is kept, and for files copies are
# preserved.
$bsdtar --confirmation "$@" > $output 2< <(awk '{
if (substr($0,0,1) == "#") {
next;
}
line_count[$1]++;
if (line_count[$1] > 1) {
if ($1 == "/.") {
next
}
print $1
count[$1]++;
ORS=""
keep="n"
if (count[$1] == 1 || $1 !~ "/$") {
keep="y"
}
}' "$mtree" | sort | uniq | sort -r > "$duplicates"

$bsdtar --exclude "^./$" $@ | run_gtar --delete --file - --occurrence=1 --files-from="$duplicates" > "$output"
for (i=0;i<31;i++) print keep
fflush()
}' "$mtree")
rm "$mtree"
else
# No deduplication, business as usual
Expand Down

0 comments on commit 297a974

Please sign in to comment.