Skip to content

Commit

Permalink
parallelize decoding (#127)
Browse files Browse the repository at this point in the history
* parallelize decoding

* use _read(::IO, ::AbstractArray) for backwards compatibility

---------

Co-authored-by: Tamas Nagy <[email protected]>
  • Loading branch information
chrstphrbrns and tlnagy authored Nov 28, 2023
1 parent 4613107 commit 29c5038
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 18 deletions.
10 changes: 5 additions & 5 deletions src/compression.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ function Base.read!(tfs::TiffFileStrip, arr::AbstractArray{T, N}, ::Val{COMPRESS
nxt = Array{UInt8}(undef, 1)
arr = reinterpret(UInt8, arr)
while pos < length(arr)
read!(tfs.tf, nbit)
read!(tfs.io, nbit)
n = nbit[1]
if 0 <= n <= 127
read!(tfs.tf, view(arr, pos:pos+n))
_read!(tfs.io, view(arr, pos:pos+n))
pos += n
elseif -127 <= n <= -1
read!(tfs.tf, nxt)
read!(tfs.io, nxt)
arr[pos:(pos-n)] .= nxt[1]
pos += -n
end
Expand All @@ -35,11 +35,11 @@ function Base.read!(tfs::TiffFileStrip, arr::AbstractArray{T, N}, ::Val{COMPRESS
end

function Base.read!(tfs::TiffFileStrip, arr::AbstractArray, ::Val{COMPRESSION_DEFLATE})
readbytes!(InflateZlibStream(tfs.tf.io.io), reinterpret(UInt8, vec(arr)))
readbytes!(InflateZlibStream(tfs.io), reinterpret(UInt8, vec(arr)))
end

function Base.read!(tfs::TiffFileStrip, arr::AbstractArray, ::Val{COMPRESSION_ADOBE_DEFLATE})
readbytes!(InflateZlibStream(tfs.tf.io.io), reinterpret(UInt8, vec(arr)))
readbytes!(InflateZlibStream(tfs.io), reinterpret(UInt8, vec(arr)))
end

function lzw_decode!(io, arr::AbstractArray)
Expand Down
46 changes: 33 additions & 13 deletions src/ifds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -212,17 +212,16 @@ pixels in the image
$(FIELDS)
"""
struct TiffFileStrip{O, S, P}
"""The file stream"""
tf::TiffFile{O, S}
struct TiffFileStrip{O, T}
"""Strip data"""
io::IOBuffer

"""The IFD corresponding to this strip"""
ifd::IFD{O}
"""The number of bytes in this strip"""
bytes::Int
end

Base.read!(tfs::TiffFileStrip, arr::AbstractArray) = read!(tfs.tf, arr)
Base.bytesavailable(tfs::TiffFileStrip) = tfs.bytes
Base.read!(tfs::TiffFileStrip, arr::AbstractArray) = _read!(tfs.io, arr)
Base.bytesavailable(tfs::TiffFileStrip) = bytesavailable(tfs.io)

function Base.read!(target::AbstractArray{T, N}, tf::TiffFile{O, S}, ifd::IFD{O}) where {T, N, O, S}
if PLANARCONFIG in ifd
Expand All @@ -237,11 +236,12 @@ function Base.read!(target::AbstractArray{T, N}, tf::TiffFile{O, S}, ifd::IFD{O}
# number of input bytes in each strip or tile
encoded_bytes = istiled(ifd) ? ifd[TILEBYTECOUNTS].data : ifd[STRIPBYTECOUNTS].data

rows = nrows(ifd)
cols = ncols(ifd)

if istiled(ifd)
strip_pixels = map(_ -> tilecols(ifd) * tilerows(ifd), encoded_bytes)
else
rows = nrows(ifd)
cols = ncols(ifd)
nstrips = length(encoded_bytes)
rowsperstrip = getdata(Int, ifd, ROWSPERSTRIP, rows)

Expand All @@ -251,17 +251,37 @@ function Base.read!(target::AbstractArray{T, N}, tf::TiffFile{O, S}, ifd::IFD{O}
@assert sum(strip_pixels) == rows * cols
end

parallel_enabled = something(tryparse(Bool, get(ENV, "JULIA_IMAGES_PARALLEL", "1")), false)
do_parallel = parallel_enabled && rows * cols > 250_000 # pixels

start = 1
comp = Val(compression)
rtype = rawtype(ifd)
tasks::Vector{Task} = []
for (offset, len, bytes) in zip(offsets, strip_pixels, encoded_bytes)
seek(tf, offset)
tfs = TiffFileStrip{O, S, rtype}(tf, ifd, bytes)
arr = view(target, start:(start+len-1))
read!(tfs, arr, comp)
reverse_prediction!(tfs, arr)
data = Vector{UInt8}(undef, bytes)
read!(tf, data)
tfs = TiffFileStrip{O, rtype}(IOBuffer(data), ifd)

function go(tfs, arr, comp)
read!(tfs, arr, comp)
reverse_prediction!(tfs, arr)
end

if do_parallel
push!(tasks, Threads.@spawn go(tfs, arr, comp))
else
go(tfs, arr, comp)
end

start += len
end

for task in tasks
wait(task)
end
else
seek(tf, first(offsets))
read!(tf, target, compression)
Expand Down Expand Up @@ -324,7 +344,7 @@ function Base.write(tf::TiffFile{O}, ifd::IFD{O}) where {O <: Unsigned}
return ifd_end_pos
end

function reverse_prediction!(tfs::TiffFileStrip{O, S, P}, arr::AbstractArray{T, N}) where {O, S, P, T, N}
function reverse_prediction!(tfs::TiffFileStrip{O, P}, arr::AbstractArray{T,N}) where {O, P, T, N}
pred::Int = predictor(tfs.ifd)
spp::Int = nsamples(tfs.ifd)
if pred == 2
Expand Down
3 changes: 3 additions & 0 deletions src/load.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ true, false, and false by default, respectively. Setting `verbose` to false
will hide the loading bar, while setting either `lazyio` or `mmap` to true
defer loading until the data are needed (by either of two mechanisms).
Parallelism is enabled by default, but can be disabled by setting
`JULIA_IMAGES_PARALLEL`=false in your environment
See [Lazy TIFFs](@ref) for more details about memory-mapping and lazy I/O.
"""
function load(filepath::String; mode = "r", kwargs...)
Expand Down

2 comments on commit 29c5038

@tlnagy
Copy link
Owner

@tlnagy tlnagy commented on 29c5038 Dec 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/96645

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.8.0 -m "<description of version>" 29c50381968d66f2c40399257b1d838cf4de111a
git push origin v0.8.0

Please sign in to comment.