Skip to content

Commit

Permalink
Add a function to read the data of a single file in the tarball.
Browse files Browse the repository at this point in the history
  • Loading branch information
fredrikekre committed Jan 29, 2021
1 parent 37766a2 commit 893c1b9
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Tar"
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
authors = ["Stefan Karpinski <[email protected]>"]
version = "1.9.0"
version = "1.10.0"

[deps]
ArgTools = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
Expand Down
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,19 @@ will also not be copied and will instead be skipped. By default, `extract` will
detect whether symlinks can be created in `dir` or not and will automatically
copy symlinks if they cannot be created.

### `Tar.extract_file`

```jl
extract_file(tarball, filepath) -> Vector{UInt8}
```
* `tarball :: Union{AbstractString, AbstractCmd, IO}`
* `filepath :: AbstractString`

Read the content of a single file inside the tarball archive.
Return a `Vector{UInt8}` with the data, or `nothing` if no
matching file was found. `filepath` should be specified as a path
relative the tarball root.

### Tar.list

```jl
Expand Down
33 changes: 33 additions & 0 deletions src/extract.jl
Original file line number Diff line number Diff line change
Expand Up @@ -590,3 +590,36 @@ function read_data(
r < n && error("premature end of tar file")
return view(buf, 1:size)
end


"""
extract_file(tarball::Union{AbstractString, IO, Cmd}, file::AbstractString) -> Vector{UInt8}
Extract the content of a single file from the tarball.
Return a `Vector{UInt8}` with the data, or `nothing` if no
matching file was found. `file` should be specified as a path
relative the tarball root.
"""
function extract_file(tarball::ArgRead, file::AbstractString)::Union{Nothing,Vector{UInt8}}
arg_read(tarball) do tar
# TODO: Correct to filter out "."?
parts = filter!(x -> x != ".", split(file, '/'; keepempty=false))
predicate = hdr -> begin
hdr_parts = filter!(x -> x != ".", split(hdr.path, '/'; keepempty=false))
hdr.type == :file && parts == hdr_parts
end
buf = IOBuffer()
found = false
Tar.read_tarball(predicate, tar) do hdr, _
found && throw(ArgumentError("multiple files in the tarball matches the filename $file"))
found = true
Tar.read_data(tar, buf, size=hdr.size)
end
if found
return take!(buf)
else
# TODO: Better to throw instead?
return nothing
end
end
end
25 changes: 25 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,31 @@ end
end
end

@testset "API: extract_file" begin
mktempdir() do dir
open(joinpath(dir, "file.txt"), "w") do io
write(io, "file at the root")
end
dir2 = mkdir(joinpath(dir, "directory"))
open(joinpath(dir2, "file2.txt"), "w") do io
write(io, "file in directory")
end
tarball = Tar.create(dir)
for tar in (()->tarball, ()->open(tarball))
bytes = Tar.extract_file(tar(), "file.txt")
@test String(bytes) == "file at the root"
bytes = Tar.extract_file(tar(), "./file.txt")
@test String(bytes) == "file at the root"
bytes = Tar.extract_file(tar(), "directory/file2.txt")
@test String(bytes) == "file in directory"
bytes = Tar.extract_file(tar(), "./directory/file2.txt")
@test String(bytes) == "file in directory"
bytes = Tar.extract_file(tar(), "non-existent")
@test bytes === nothing
end
end
end

@testset "API: rewrite" begin
# reference standard tarball
reference, hash₁ = make_test_tarball()
Expand Down

0 comments on commit 893c1b9

Please sign in to comment.