From 08d030f0f2c36896bc957edb2929a80dc79d41e3 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Wed, 13 Nov 2024 23:52:05 -0500 Subject: [PATCH] Convert H5Z*.jl packages into package extensions (#1160) * Convert H5Z*.jl packages into package extensions * Fix tests for Julia 1.6, set filter pkgs to 0.2.0 * Fix formatting, docs. Import rather than alias symbols. * Apply formatting * Update docs for extension packages * Apply formatter * Add note in history --- HISTORY.md | 1 + Project.toml | 34 +- docs/Manifest.toml | 17 +- docs/make.jl | 23 +- docs/src/interface/filters.md | 30 ++ ext/BloscExt/BloscExt.jl | 223 ++++++++++ .../H5Zblosc => ext/BloscExt}/LICENSE.txt | 0 ext/CodecBzip2Ext/CodecBzip2Ext.jl | 239 +++++++++++ .../CodecBzip2Ext}/LICENSE.txt | 0 ext/CodecBzip2Ext/README.md | 6 + .../CodecBzip2Ext}/THIRDPARTY.txt | 0 ext/CodecLz4Ext/CodecLz4Ext.jl | 236 +++++++++++ .../H5Zlz4 => ext/CodecLz4Ext}/LICENSE.txt | 0 ext/CodecLz4Ext/README.md | 6 + .../H5Zlz4 => ext/CodecLz4Ext}/THIRDPARTY.txt | 4 +- ext/CodecZstdExt/CodecZstdExt.jl | 128 ++++++ .../H5Zzstd => ext/CodecZstdExt}/LICENSE.txt | 0 ext/CodecZstdExt/Project.toml.old | 12 + ext/CodecZstdExt/README.md | 8 + .../CodecZstdExt}/THIRDPARTY.txt | 0 ext/bitshuffle_jll_ext/LICENSE.txt | 23 ++ ext/bitshuffle_jll_ext/README.md | 6 + ext/bitshuffle_jll_ext/bitshuffle_jll_ext.jl | 388 ++++++++++++++++++ filters/H5Zbitshuffle/Project.toml | 4 +- filters/H5Zbitshuffle/src/H5Zbitshuffle.jl | 372 +---------------- filters/H5Zblosc/Project.toml | 4 +- filters/H5Zblosc/src/H5Zblosc.jl | 207 +--------- filters/H5Zbzip2/Project.toml | 4 +- filters/H5Zbzip2/src/H5Zbzip2.jl | 234 +---------- filters/H5Zlz4/Project.toml | 4 +- filters/H5Zlz4/src/H5Zlz4.jl | 228 +--------- filters/H5Zzstd/Project.toml | 4 +- filters/H5Zzstd/README.md | 9 +- filters/H5Zzstd/src/H5Zzstd.jl | 134 +----- test/runtests.jl | 20 +- 35 files changed, 1456 insertions(+), 1152 deletions(-) create mode 100644 ext/BloscExt/BloscExt.jl rename {filters/H5Zblosc => ext/BloscExt}/LICENSE.txt (100%) create mode 100644 ext/CodecBzip2Ext/CodecBzip2Ext.jl rename {filters/H5Zbzip2 => ext/CodecBzip2Ext}/LICENSE.txt (100%) create mode 100644 ext/CodecBzip2Ext/README.md rename {filters/H5Zbzip2 => ext/CodecBzip2Ext}/THIRDPARTY.txt (100%) create mode 100644 ext/CodecLz4Ext/CodecLz4Ext.jl rename {filters/H5Zlz4 => ext/CodecLz4Ext}/LICENSE.txt (100%) create mode 100644 ext/CodecLz4Ext/README.md rename {filters/H5Zlz4 => ext/CodecLz4Ext}/THIRDPARTY.txt (95%) create mode 100644 ext/CodecZstdExt/CodecZstdExt.jl rename {filters/H5Zzstd => ext/CodecZstdExt}/LICENSE.txt (100%) create mode 100644 ext/CodecZstdExt/Project.toml.old create mode 100644 ext/CodecZstdExt/README.md rename {filters/H5Zzstd => ext/CodecZstdExt}/THIRDPARTY.txt (100%) create mode 100644 ext/bitshuffle_jll_ext/LICENSE.txt create mode 100644 ext/bitshuffle_jll_ext/README.md create mode 100644 ext/bitshuffle_jll_ext/bitshuffle_jll_ext.jl diff --git a/HISTORY.md b/HISTORY.md index 8fb043467..40ded242f 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,6 +4,7 @@ Please also see the [release notes](https://github.com/JuliaIO/HDF5.jl/releases) ## v0.18.0 (in development) * Refactor Dataspaces (#1104) +* Convert filter packages into package extensions (#1160) ## v0.17.2 * Fix variable length strings as attributes (#1130) diff --git a/Project.toml b/Project.toml index d27dbd6b4..de78dbe7f 100644 --- a/Project.toml +++ b/Project.toml @@ -6,31 +6,42 @@ version = "0.17.1" Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" HDF5_jll = "0234f1f7-429e-5d53-9886-15a909be8d59" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" Mmap = "a63ad114-7e13-5084-954f-fe012c677804" Preferences = "21216c6a-2e73-6563-6e65-726566657250" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Requires = "ae029012-a4dd-5104-9daa-d747884805df" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" -MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" - -[weakdeps] -MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" - -[extensions] -MPIExt = "MPI" [compat] +Blosc = "0.7.3" +CodecBzip2 = "0.7, 0.8" +CodecLz4 = "0.4" +CodecZstd = "0.7, 0.8" Compat = "3.1.0, 4" HDF5_jll = "~1.10.5, ~1.12.0, ~1.14.0" MPI = "0.20" MPIPreferences = "0.1.7" Preferences = "1.3" Requires = "1.0" +bitshuffle_jll = "0.4.2, 0.5" julia = "1.6" +[extensions] +BloscExt = "Blosc" +CodecBzip2Ext = "CodecBzip2" +CodecLz4Ext = "CodecLz4" +CodecZstdExt = "CodecZstd" +MPIExt = "MPI" +bitshuffle_jll_ext = "bitshuffle_jll" + [extras] +Blosc = "a74b3585-a348-5f62-a45c-50e91977d574" CRC32c = "8bf52ea8-c179-5cab-976a-9e18b702a9bc" +CodecBzip2 = "523fee87-0ab8-5b00-afb7-3ecf72e48cfd" +CodecLz4 = "5ba52731-8f18-5e0d-9241-30f10d1ec561" +CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" H5Zblosc = "c8ec2601-a99c-407f-b158-e79c03c2f5f7" @@ -42,6 +53,15 @@ MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +bitshuffle_jll = "228fe19c-1b83-5282-a626-13744502a320" [targets] test = ["Test", "MPI", "Distributed", "LinearAlgebra", "OrderedCollections", "Pkg", "CRC32c", "FileIO"] + +[weakdeps] +Blosc = "a74b3585-a348-5f62-a45c-50e91977d574" +CodecBzip2 = "523fee87-0ab8-5b00-afb7-3ecf72e48cfd" +CodecLz4 = "5ba52731-8f18-5e0d-9241-30f10d1ec561" +CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2" +MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" +bitshuffle_jll = "228fe19c-1b83-5282-a626-13744502a320" diff --git a/docs/Manifest.toml b/docs/Manifest.toml index fe519a269..a5fac6eb9 100644 --- a/docs/Manifest.toml +++ b/docs/Manifest.toml @@ -145,41 +145,46 @@ version = "2.46.2+0" deps = ["HDF5", "bitshuffle_jll"] path = "../filters/H5Zbitshuffle" uuid = "51b4e782-877f-4ccf-958a-27bf628210da" -version = "0.1.3" +version = "0.2.0" [[deps.H5Zblosc]] deps = ["Blosc", "HDF5"] path = "../filters/H5Zblosc" uuid = "c8ec2601-a99c-407f-b158-e79c03c2f5f7" -version = "0.1.2" +version = "0.2.0" [[deps.H5Zbzip2]] deps = ["CodecBzip2", "HDF5"] path = "../filters/H5Zbzip2" uuid = "094576f2-1e46-4c84-8e32-c46c042eaaa2" -version = "0.1.2" +version = "0.2.0" [[deps.H5Zlz4]] deps = ["CodecLz4", "HDF5"] path = "../filters/H5Zlz4" uuid = "eb20ec05-5464-47b5-ba41-098e3c1068a3" -version = "0.1.1" +version = "0.2.0" [[deps.H5Zzstd]] deps = ["CodecZstd", "HDF5"] path = "../filters/H5Zzstd" uuid = "f6f2d980-1ec6-471c-a70d-0270e22f1103" -version = "0.1.2" +version = "0.2.0" [[deps.HDF5]] deps = ["Compat", "HDF5_jll", "Libdl", "MPIPreferences", "Mmap", "Preferences", "Printf", "Random", "Requires", "UUIDs"] path = ".." uuid = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" version = "0.17.1" -weakdeps = ["MPI"] +weakdeps = ["Blosc", "CodecBzip2", "CodecLz4", "CodecZstd", "MPI", "bitshuffle_jll"] [deps.HDF5.extensions] + BloscExt = "Blosc" + CodecBzip2Ext = "CodecBzip2" + CodecLz4Ext = "CodecLz4" + CodecZstdExt = "CodecZstd" MPIExt = "MPI" + bitshuffle_jll_ext = "bitshuffle_jll" [[deps.HDF5_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LLVMOpenMP_jll", "LazyArtifacts", "LibCURL_jll", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "OpenSSL_jll", "TOML", "Zlib_jll", "libaec_jll"] diff --git a/docs/make.jl b/docs/make.jl index a989373d8..77041b2ca 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -4,13 +4,34 @@ using H5Zblosc using H5Zbzip2 using H5Zlz4 using H5Zzstd +using H5Zbitshuffle using MPI # needed to generate docs for parallel HDF5 API +# Load extension packages +const BloscExt = Base.get_extension(HDF5, :BloscExt) +const bitshuffle_jll_ext = Base.get_extension(HDF5, :bitshuffle_jll_ext) +const BloscExt = Base.get_extension(HDF5, :BloscExt) +const CodecBzip2Ext = Base.get_extension(HDF5, :CodecBzip2Ext) +const CodecLz4Ext = Base.get_extension(HDF5, :CodecLz4Ext) +const CodecZstdExt = Base.get_extension(HDF5, :CodecZstdExt) + DocMeta.setdocmeta!(HDF5, :DocTestSetup, :(using HDF5); recursive=true) makedocs(; sitename="HDF5.jl", - modules=[HDF5, H5Zblosc, H5Zbzip2, H5Zlz4, H5Zzstd], + modules=[ + HDF5, + H5Zblosc, + H5Zbzip2, + H5Zlz4, + H5Zzstd, + H5Zbitshuffle, + bitshuffle_jll_ext, + BloscExt, + CodecBzip2Ext, + CodecLz4Ext, + CodecZstdExt + ], authors="Mustafa Mohamad and contributors", format=Documenter.HTML(; prettyurls=get(ENV, "CI", "false") == "true", diff --git a/docs/src/interface/filters.md b/docs/src/interface/filters.md index 99611a677..7367ea177 100644 --- a/docs/src/interface/filters.md +++ b/docs/src/interface/filters.md @@ -39,12 +39,29 @@ Several external Julia packages implement HDF5 filter plugins in Julia. As they are independent of HDF5.jl, they must be installed in order to use their plugins. The +[H5Zbitshuffle.jl](https://github.com/JuliaIO/HDF5.jl/tree/master/filters/H5Zbitshuffle), [H5Zblosc.jl](https://github.com/JuliaIO/HDF5.jl/tree/master/filters/H5Zblosc), [H5Zbzip2.jl](https://github.com/JuliaIO/HDF5.jl/tree/master/filters/H5Zbzip2), [H5Zlz4.jl](https://github.com/JuliaIO/HDF5.jl/tree/master/filters/H5Zlz4), and [H5Zzstd.jl](https://github.com/JuliaIO/HDF5.jl/tree/master/filters/H5Zzstd) packages are maintained as independent subdirectory packages within the HDF5.jl repository. +### Extension packages + +For Julia 1.9 and greater, the external filter packages listed above at version 0.2 and greater are loose wrappers around extension packages. The filter extension packages currently implemented are as follows: +* bitshuffle_jll_ext +* H5Zblosc_ext +* H5Zbzip2_ext +* H5Zlz4_ext +* H5Zzstd_ext + +``` +using HDF5, CodecZstd +const CodecZstdExt = Base.get_extension(HDF5, :CodecZstdExt) +using .CodecZstdExt +filter = ZstdFilter(5) +``` + ### H5Zblosc.jl ```@meta @@ -83,6 +100,19 @@ CurrentModule = H5Zzstd ```@docs ZstdFilter +H5Zzstd +``` + +### H5Zbitshuffle + +```@meta +CurrentModule = H5Zbitshuffle +``` + +```@docs +BitshuffleFilter +H5Zbitshuffle +bitshuffle_jll_ext ``` ## Other External Filters diff --git a/ext/BloscExt/BloscExt.jl b/ext/BloscExt/BloscExt.jl new file mode 100644 index 000000000..3401904bb --- /dev/null +++ b/ext/BloscExt/BloscExt.jl @@ -0,0 +1,223 @@ +module BloscExt +# port of https://github.com/Blosc/c-blosc/blob/3a668dcc9f61ad22b5c0a0ab45fe8dad387277fd/hdf5/blosc_filter.c (copyright 2010 Francesc Alted, license: MIT/expat) + +import Blosc +using HDF5.API +import HDF5.Filters: Filter, FilterPipeline +import HDF5.Filters: + filterid, + register_filter, + filtername, + filter_func, + filter_cfunc, + set_local_func, + set_local_cfunc +import HDF5.Filters.Shuffle + +export H5Z_FILTER_BLOSC, blosc_filter, BloscFilter + +# Import Blosc shuffle constants +import Blosc: NOSHUFFLE, SHUFFLE, BITSHUFFLE + +const H5Z_FILTER_BLOSC = API.H5Z_filter_t(32001) # Filter ID registered with the HDF Group for Blosc +const FILTER_BLOSC_VERSION = 2 +const blosc_name = "blosc" + +function blosc_set_local(dcpl::API.hid_t, htype::API.hid_t, space::API.hid_t) + blosc_flags = Ref{Cuint}() + blosc_values = Vector{Cuint}(undef, 8) + blosc_nelements = Ref{Csize_t}(length(blosc_values)) + blosc_chunkdims = Vector{API.hsize_t}(undef, 32) + + API.h5p_get_filter_by_id( + dcpl, + H5Z_FILTER_BLOSC, + blosc_flags, + blosc_nelements, + blosc_values, + 0, + C_NULL, + C_NULL + ) + flags = blosc_flags[] + + nelements = max(blosc_nelements[], 4) # First 4 slots reserved + + # Set Blosc info in first two slots + blosc_values[1] = FILTER_BLOSC_VERSION + blosc_values[2] = Blosc.VERSION_FORMAT + + ndims = API.h5p_get_chunk(dcpl, 32, blosc_chunkdims) + chunksize = prod(resize!(blosc_chunkdims, ndims)) + if ndims < 0 || ndims > 32 || chunksize > Blosc.MAX_BUFFERSIZE + return API.herr_t(-1) + end + + htypesize = API.h5t_get_size(htype) + if API.h5t_get_class(htype) == API.H5T_ARRAY + hsuper = API.h5t_get_super(htype) + basetypesize = API.h5t_get_size(hsuper) + API.h5t_close(hsuper) + else + basetypesize = htypesize + end + + # Limit large typesizes (they are pretty inefficient to shuffle + # and, in addition, Blosc does not handle typesizes larger than + # blocksizes). + if basetypesize > Blosc.MAX_TYPESIZE + basetypesize = 1 + end + blosc_values[3] = basetypesize + blosc_values[4] = chunksize * htypesize # size of the chunk + + API.h5p_modify_filter(dcpl, H5Z_FILTER_BLOSC, flags, nelements, blosc_values) + + return API.herr_t(1) +end + +function blosc_filter( + flags::Cuint, + cd_nelmts::Csize_t, + cd_values::Ptr{Cuint}, + nbytes::Csize_t, + buf_size::Ptr{Csize_t}, + buf::Ptr{Ptr{Cvoid}} +) + typesize = unsafe_load(cd_values, 3) # The datatype size + outbuf_size = unsafe_load(cd_values, 4) + # Compression level: + clevel = cd_nelmts >= 5 ? unsafe_load(cd_values, 5) : Cuint(5) + # Do shuffle: + doshuffle = cd_nelmts >= 6 ? unsafe_load(cd_values, 6) : SHUFFLE + + if (flags & API.H5Z_FLAG_REVERSE) == 0 # compressing + # Allocate an output buffer exactly as long as the input data; if + # the result is larger, we simply return 0. The filter is flagged + # as optional, so HDF5 marks the chunk as uncompressed and proceeds. + outbuf_size = unsafe_load(buf_size) + outbuf_size <= 0 && return Csize_t(0) + outbuf = Libc.malloc(outbuf_size) + outbuf == C_NULL && return Csize_t(0) + + compname = if cd_nelmts >= 7 + compcode = unsafe_load(cd_values, 7) + Blosc.compname(compcode) + else + "blosclz" + end + Blosc.set_compressor(compname) + status = Blosc.blosc_compress( + clevel, doshuffle, typesize, nbytes, unsafe_load(buf), outbuf, nbytes + ) + status < 0 && (Libc.free(outbuf); return Csize_t(0)) + else # decompressing + # Extract the exact outbuf_size from the buffer header. + # + # NOTE: the guess value got from "cd_values" corresponds to the + # uncompressed chunk size but it should not be used in a general + # cases since other filters in the pipeline can modify the buffer + # size. + in = unsafe_load(buf) + # See https://github.com/JuliaLang/julia/issues/43402 + # Resolved in https://github.com/JuliaLang/julia/pull/43408 + outbuf_size, cbytes, blocksize = Blosc.cbuffer_sizes(in) + outbuf_size <= 0 && return Csize_t(0) + outbuf = Libc.malloc(outbuf_size) + outbuf == C_NULL && return Csize_t(0) + status = Blosc.blosc_decompress(in, outbuf, outbuf_size) + status <= 0 && (Libc.free(outbuf); return Csize_t(0)) + end + + if status != 0 + Libc.free(unsafe_load(buf)) + unsafe_store!(buf, outbuf) + unsafe_store!(buf_size, outbuf_size) + return Csize_t(status) # size of compressed/decompressed data + end + Libc.free(outbuf) + return Csize_t(0) +end + +""" + BloscFilter(;level=5, shuffle=true, compressor="blosclz") + +The Blosc compression filter, using [Blosc.jl](https://github.com/JuliaIO/Blosc.jl). Options: + + - `level`: compression level + - `shuffle`: whether to shuffle data before compressing (this option should be used instead of the [`Shuffle`](@ref) filter) + - `compressor`: the compression algorithm. Call `Blosc.compressors()` for the available compressors. + +# External links +* [What Is Blosc?](https://www.blosc.org/pages/blosc-in-depth/) +* [Blosc HDF5 Filter ID 32001](https://portal.hdfgroup.org/display/support/Filters#Filters-32001) +* [Blosc HDF5 Plugin Repository (C code)](https://github.com/Blosc/hdf5-blosc) +""" +struct BloscFilter <: Filter + blosc_version::Cuint + version_format::Cuint + typesize::Cuint + bufsize::Cuint + level::Cuint + shuffle::Cuint + compcode::Cuint +end + +function BloscFilter(; level=5, shuffle=SHUFFLE, compressor="blosclz") + Blosc.isvalidshuffle(shuffle) || throw(ArgumentError("invalid blosc shuffle $shuffle")) + compcode = Blosc.compcode(compressor) + BloscFilter(0, 0, 0, 0, level, shuffle, compcode) +end + +filterid(::Type{BloscFilter}) = H5Z_FILTER_BLOSC +filtername(::Type{BloscFilter}) = blosc_name +set_local_func(::Type{BloscFilter}) = blosc_set_local +set_local_cfunc(::Type{BloscFilter}) = + @cfunction(blosc_set_local, API.herr_t, (API.hid_t, API.hid_t, API.hid_t)) +filter_func(::Type{BloscFilter}) = blosc_filter +filter_cfunc(::Type{BloscFilter}) = @cfunction( + blosc_filter, + Csize_t, + (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) +) + +function Base.show(io::IO, blosc::BloscFilter) + print( + io, + BloscFilter, + "(level=", + Int(blosc.level), + ",shuffle=", + blosc.shuffle == NOSHUFFLE ? "NOSHUFFLE" : + blosc.shuffle == SHUFFLE ? "SHUFFLE" : + blosc.shuffle == BITSHUFFLE ? "BITSHUFFLE" : + "UNKNOWN", + ",compressor=", + Blosc.compname(blosc.compcode), + ")" + ) +end + +function Base.push!(f::FilterPipeline, blosc::BloscFilter) + 0 <= blosc.level <= 9 || + throw(ArgumentError("blosc compression $(blosc.level) not in [0,9]")) + Blosc.isvalidshuffle(blosc.shuffle) || + throw(ArgumentError("invalid blosc shuffle $(blosc.shuffle)")) + ref = Ref(blosc) + GC.@preserve ref begin + API.h5p_set_filter( + f.plist, + filterid(BloscFilter), + API.H5Z_FLAG_OPTIONAL, + div(sizeof(BloscFilter), sizeof(Cuint)), + pointer_from_objref(ref) + ) + end + return f +end + +function __init__() + register_filter(BloscFilter) +end + +end # module BloscExt diff --git a/filters/H5Zblosc/LICENSE.txt b/ext/BloscExt/LICENSE.txt similarity index 100% rename from filters/H5Zblosc/LICENSE.txt rename to ext/BloscExt/LICENSE.txt diff --git a/ext/CodecBzip2Ext/CodecBzip2Ext.jl b/ext/CodecBzip2Ext/CodecBzip2Ext.jl new file mode 100644 index 000000000..055abb1ba --- /dev/null +++ b/ext/CodecBzip2Ext/CodecBzip2Ext.jl @@ -0,0 +1,239 @@ +#= +The code below has been ported to Julia from the original C source: +https://github.com/nexusformat/HDF5-External-Filter-Plugins/blob/master/BZIP2/src/H5Zbzip2.c +The filter function H5Z_filter_bzip2 was adopted from: +PyTables http://www.pytables.org. +The plugin can be used with the HDF5 library version 1.8.11+ to read HDF5 datasets compressed with bzip2 created by PyTables. +License: licenses/H5Zbzip2_LICENSE.txt + +The following license applies to the Julia port. +Copyright (c) 2021 Mark Kittisopikul and Howard Hughes Medical Institute. License MIT, see LICENSE.txt +=# +module CodecBzip2Ext + +using CodecBzip2 +import CodecBzip2: libbzip2 +using HDF5.API +import HDF5.Filters: + Filter, filterid, register_filter, filtername, filter_func, filter_cfunc + +export H5Z_FILTER_BZIP2, H5Z_filter_bzip2, Bzip2Filter + +const H5Z_FILTER_BZIP2 = API.H5Z_filter_t(307) +const bzip2_name = "HDF5 bzip2 filter; see http://www.hdfgroup.org/services/contributions.html" + +function H5Z_filter_bzip2( + flags::Cuint, + cd_nelmts::Csize_t, + cd_values::Ptr{Cuint}, + nbytes::Csize_t, + buf_size::Ptr{Csize_t}, + buf::Ptr{Ptr{Cvoid}} +)::Csize_t + outbuf = C_NULL + outdatalen = Cuint(0) + + # Prepare the output buffer + + try + if flags & API.H5Z_FLAG_REVERSE != 0 + # Decompress + + outbuflen = nbytes * 3 + 1 + outbuflen <= 0 && + error("H5Zbzip2: Non-positive outbuflen for malloc: $outbuflen.") + outbuf = Libc.malloc(outbuflen) + if outbuf == C_NULL + error("H5Zbzip2: memory allocation failed for bzip2 decompression.") + end + + stream = CodecBzip2.BZStream() + # Just use default malloc and free + stream.bzalloc = C_NULL + stream.bzfree = C_NULL + # BZ2_bzDecompressInit + ret = CodecBzip2.decompress_init!(stream, 0, false) + if ret != CodecBzip2.BZ_OK + errror("H5Zbzip2: bzip2 decompress start failed with error $ret.") + end + + stream.next_out = outbuf + stream.avail_out = outbuflen + stream.next_in = unsafe_load(buf) + stream.avail_in = nbytes + + cont = true + + while cont + # BZ2_bzDecompress + ret = CodecBzip2.decompress!(stream) + if ret < 0 + error("H5Zbzip2: bzip2 decompression failed with error $ret.") + end + cont = ret != CodecBzip2.BZ_STREAM_END + if cont && stream.avail_out == 0 + # Grow the output buffer + newbuflen = outbuflen * 2 + newbuf = Libc.realloc(outbuf, newbuflen) + if newbuf == C_NULL + error("H5Zbzip2: memory allocation failed for bzip2 decompression.") + end + stream.next_out = newbuf + outbuflen + stream.avail_out = outbuflen + outbuf = newbuf + outbuflen = newbuflen + end + end + + outdatalen = stream.total_out_lo32 + # BZ2_bzDecompressEnd + ret = CodecBzip2.decompress_end!(stream) + if ret != CodecBzip2.BZ_OK + error("H5Zbzip2: bzip2 compression end failed with error $ret.") + end + else + # Compress data + + # Maybe not the same size as outdatalen + odatalen = Cuint(0) + blockSize100k = 9 + + # Get compression blocksize if present + if cd_nelmts > 0 + blockSize100k = unsafe_load(cd_values) + if blockSize100k < 1 || blockSize100k > 9 + error("H5Zbzip2: Invalid compression blocksize: $blockSize100k") + end + end + + # Prepare the output buffer + outbuflen = nbytes + nbytes ÷ 100 + 600 # worse case (bzip2 docs) + outbuflen <= 0 && + error("H5Zbzip2: Non-positive outbuflen for malloc: $outbuflen.") + outbuf = Libc.malloc(outbuflen) + @debug "Allocated" outbuflen outbuf + if outbuf == C_NULL + error("H5Zbzip2: Memory allocation failed for bzip2 compression") + end + + # Compress data + odatalen = outbuflen + r_odatalen = Ref{Cuint}(odatalen) + ret = BZ2_bzBuffToBuffCompress( + outbuf, r_odatalen, unsafe_load(buf), nbytes, blockSize100k, 0, 0 + ) + outdatalen = r_odatalen[] + if ret != CodecBzip2.BZ_OK + error("H5Zbzip2: bzip2 compression failed with error $ret.") + end + end # if flags & API.H5Z_FLAG_REVERSE != 0 + Libc.free(unsafe_load(buf)) + unsafe_store!(buf, outbuf) + unsafe_store!(buf_size, outbuflen) + + catch err + # "In the case of failure, the return value is 0 (zero) and all pointer arguments are left unchanged." + outdatalen = Csize_t(0) + if outbuf != C_NULL + Libc.free(outbuf) + end + @error "H5Zbzip2.jl Non-Fatal ERROR: " err + display(stacktrace(catch_backtrace())) + end # try - catch + + return Csize_t(outdatalen) +end # function H5Z_filter_bzip2 + +# Need stdcall for 32-bit Windows? +function BZ2_bzBuffToBuffCompress( + dest, destLen, source, sourceLen, blockSize100k, verbosity, workFactor +) + @static if CodecBzip2.WIN32 + return ccall( + ("BZ2_bzBuffToBuffCompress@28", libbzip2), + stdcall, + Cint, + (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint, Cint), + dest, + destLen, + source, + sourceLen, + blockSize100k, + verbosity, + workFactor + ) + else + return ccall( + (:BZ2_bzBuffToBuffCompress, libbzip2), + Cint, + (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint, Cint), + dest, + destLen, + source, + sourceLen, + blockSize100k, + verbosity, + workFactor + ) + end +end + +function BZ2_bzBuffToBuffDecompress(dest, destLen, source, sourceLen, small, verbosity) + @static if CodecBzip2.WIN32 + return ccall( + ("BZ2_bzBuffToBuffDecompress@24", libbzip2), + stdcall, + Cint, + (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint), + dest, + destLen, + source, + sourceLen, + small, + verbosity + ) + else + return ccall( + (:BZ2_bzBuffToBuffDecompress, libbzip2), + Cint, + (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint), + dest, + destLen, + source, + sourceLen, + small, + verbosity + ) + end +end + +# Filters Module + +""" + Bzip2Filter(blockSize100k) + +Apply Bzip2 compression. The filter id is $H5Z_FILTER_BZIP2. + +# External Links +* [BZIP2 HDF5 Filter ID 307](https://portal.hdfgroup.org/display/support/Filters#Filters-307) +* [PyTables Repository (C code)](https://github.com/PyTables/PyTables) +""" +struct Bzip2Filter <: Filter + blockSize100k::Cuint +end +Bzip2Filter() = Bzip2Filter(9) + +filterid(::Type{Bzip2Filter}) = H5Z_FILTER_BZIP2 +filtername(::Type{Bzip2Filter}) = bzip2_name +filter_func(::Type{Bzip2Filter}) = H5Z_filter_bzip2 +filter_cfunc(::Type{Bzip2Filter}) = @cfunction( + H5Z_filter_bzip2, + Csize_t, + (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) +) + +function __init__() + register_filter(Bzip2Filter) +end + +end # module CodecBzip2Ext diff --git a/filters/H5Zbzip2/LICENSE.txt b/ext/CodecBzip2Ext/LICENSE.txt similarity index 100% rename from filters/H5Zbzip2/LICENSE.txt rename to ext/CodecBzip2Ext/LICENSE.txt diff --git a/ext/CodecBzip2Ext/README.md b/ext/CodecBzip2Ext/README.md new file mode 100644 index 000000000..3d767db59 --- /dev/null +++ b/ext/CodecBzip2Ext/README.md @@ -0,0 +1,6 @@ +# H5Zbzip2.jl + +Implements the Bzip2 filter for [HDF5.jl](https://github.com/JuliaIO/HDF5.jl) in Julia. +See the [documentation](https://juliaio.github.io/HDF5.jl/stable/filters/#H5Zbzip2.jl) + +This implements [HDF5 registered filter id 307](https://portal.hdfgroup.org/display/support/Filters#Filters-307). \ No newline at end of file diff --git a/filters/H5Zbzip2/THIRDPARTY.txt b/ext/CodecBzip2Ext/THIRDPARTY.txt similarity index 100% rename from filters/H5Zbzip2/THIRDPARTY.txt rename to ext/CodecBzip2Ext/THIRDPARTY.txt diff --git a/ext/CodecLz4Ext/CodecLz4Ext.jl b/ext/CodecLz4Ext/CodecLz4Ext.jl new file mode 100644 index 000000000..1070ab0ad --- /dev/null +++ b/ext/CodecLz4Ext/CodecLz4Ext.jl @@ -0,0 +1,236 @@ +#= +This is a port of H5Zlz4.c to Julia +https://github.com/HDFGroup/hdf5_plugins/blob/master/LZ4/src/H5Zlz4.c +https://github.com/nexusformat/HDF5-External-Filter-Plugins/blob/master/LZ4/src/H5Zlz4.c +https://github.com/silx-kit/hdf5plugin/blob/main/src/LZ4/H5Zlz4.c + +H5Zlz4 is originally a copyright of HDF Group. License: licenses/H5Zlz4_LICENSE.txt + +The following license applies to the Julia port. +Copyright (c) 2021 Mark Kittisopikul and Howard Hughes Medical Institute. License MIT, see LICENSE.txt +=# +module CodecLz4Ext + +using CodecLz4 +using HDF5.API +import HDF5.Filters: + Filter, filterid, register_filter, filtername, filter_func, filter_cfunc + +export H5Z_FILTER_LZ4, H5Z_filter_lz4, Lz4Filter + +const H5Z_FILTER_LZ4 = API.H5Z_filter_t(32004) + +const DEFAULT_BLOCK_SIZE = 1 << 30 +const lz4_name = "HDF5 lz4 filter; see http://www.hdfgroup.org/services/contributions.html" + +const LZ4_AGGRESSION = Ref(1) + +# flags H5Z_FLAG_REVERSE or H5Z_FLAG_OPTIONAL +# cd_nelmts number of elements in cd_values (0 or 1) +# cd_values the first optional element must be the blockSize +# nbytes - number of valid bytes of data +# buf_size - total size of buffer +# buf - pointer to pointer of data +function H5Z_filter_lz4( + flags::Cuint, + cd_nelmts::Csize_t, + cd_values::Ptr{Cuint}, + nbytes::Csize_t, + buf_size::Ptr{Csize_t}, + buf::Ptr{Ptr{Cvoid}} +)::Csize_t + outBuf = C_NULL + ret_value = Csize_t(0) + + try + if (flags & API.H5Z_FLAG_REVERSE) != 0 # reverse filter, decompressing + #i32Buf = Ref{UInt32}() + blockSize = UInt32(0) + roBuf = Ref{UInt8}() + rpos = Ptr{UInt8}(unsafe_load(buf)) + #i64Buf = Ptr{UInt64}(rpos) + # Load the first 8 bytes from buffer as a big endian UInt64 + # This is the original size of the buffer + origSize = ntoh(unsafe_load(Ptr{UInt64}(rpos))) + rpos += 8 # advance the pointer + + # Next read the next four bytes from the buffer as a big endian UInt32 + # This is the blocksize + #i32Buf[] = rpos + blockSize = ntoh(unsafe_load(Ptr{UInt32}(rpos))) + rpos += 4 + if blockSize > origSize + blockSize = origSize + end + + # malloc a byte buffer of origSize + # outBuf = Vector{UInt8}(undef, origSize) + @debug "OrigSize" origSize + origSize <= 0 && error("H5Zlz4: Non-positive origSize for malloc: $origSize") + outBuf = Libc.malloc(origSize) + outBuf == C_NULL && error("H5Zlz4: Could not allocate memory via malloc") + # Julia should throw an error if it cannot allocate this + roBuf = Ptr{UInt8}(outBuf) + decompSize = 0 + # Start with the first blockSize + while decompSize < origSize + # compressedBlockSize = UInt32(0) + if origSize - decompSize < blockSize # the last block can be smaller than block size + blockSize = origSize - decompSize + end + + #i32Buf[] = rpos + compressedBlockSize = ntoh(unsafe_load(Ptr{UInt32}(rpos))) + rpos += 4 + + if compressedBlockSize == blockSize + # There was no compression + # memcpy(roBuf, rpos, blockSize) + unsafe_copyto!(roBuf, rpos, blockSize) + decompressedBytes = blockSize + else + # do the compression + # LZ4_decompress_fast, version number 10300 ? + @debug "decompress_safe" rpos roBuf compressedBlockSize ( + origSize - decompSize + ) + decompressedBytes = CodecLz4.LZ4_decompress_safe( + rpos, roBuf, compressedBlockSize, origSize - decompSize + ) + @debug "decompressedBytes" decompressedBytes + end + + rpos += compressedBlockSize + roBuf += blockSize + decompSize += decompressedBytes + end + Libc.free(unsafe_load(buf)) + unsafe_store!(buf, outBuf) + outBuf = C_NULL + ret_value = Csize_t(origSize) + else + # forward filter + # compressing + #i64Buf = Ref{UInt64}() + #i32Buf = Ref{UInt32}() + + if nbytes > typemax(Int32) + error("Can only compress chunks up to 2GB") + end + blockSize = unsafe_load(cd_values) + if cd_nelmts > 0 && blockSize > 0 + else + blockSize = DEFAULT_BLOCK_SIZE + end + if blockSize > nbytes + blockSize = nbytes + end + nBlocks = (nbytes - 1) ÷ blockSize + 1 + maxDestSize = + nBlocks * CodecLz4.LZ4_compressBound(blockSize) + 4 + 8 + nBlocks * 4 + maxDestSize <= 0 && + error("H5Zlz4: Non-positive maxDestSize for malloc: $maxDestSize") + outBuf = Libc.malloc(maxDestSize) + outBuf == C_NULL && error("H5Zlz4: Could not allocate memory via malloc") + + rpos = Ptr{UInt8}(unsafe_load(buf)) + roBuf = Ptr{UInt8}(outBuf) + + # Header + unsafe_store!(Ptr{UInt64}(roBuf), hton(UInt64(nbytes))) + roBuf += 8 + + unsafe_store!(Ptr{UInt32}(roBuf), hton(UInt32(blockSize))) + roBuf += 4 + + outSize = 12 + + for block in 0:(nBlocks - 1) + # compBlockSize::UInt32 + origWritten = Csize_t(block * blockSize) + if nbytes - origWritten < blockSize # the last block may be < blockSize + blockSize = nbytes - origWritten + end + + # aggression = 1 is the same LZ4_compress_default + @debug "LZ4_compress_fast args" rpos outBuf roBuf roBuf + 4 blockSize nBlocks CodecLz4.LZ4_compressBound( + blockSize + ) + compBlockSize = UInt32( + CodecLz4.LZ4_compress_fast( + rpos, + roBuf + 4, + blockSize, + CodecLz4.LZ4_compressBound(blockSize), + LZ4_AGGRESSION[] + ) + ) + @debug "Compressed block size" compBlockSize + + if compBlockSize == 0 + error("Could not compress block $block") + end + + if compBlockSize >= blockSize # compression did not save any space, do a memcpy instead + compBlockSize = blockSize + unsafe_copyto!(roBuf + 4, rpos, blockSize) + end + + unsafe_store!(Ptr{UInt32}(roBuf), hton(UInt32(compBlockSize))) # write blocksize + roBuf += 4 + + rpos += blockSize + roBuf += compBlockSize + outSize += compBlockSize + 4 + end + + Libc.free(unsafe_load(buf)) + unsafe_store!(buf, outBuf) + unsafe_store!(buf_size, outSize) + outBuf = C_NULL + ret_value = Csize_t(outSize) + end # (flags & API.H5Z_FLAG_REVERSE) != 0 + + catch err + # "In the case of failure, the return value is 0 (zero) and all pointer arguments are left unchanged." + ret_value = Csize_t(0) + @async @error "H5Zlz4.jl Non-Fatal ERROR: " err + display(stacktrace(catch_backtrace())) + finally + if outBuf != C_NULL + Libc.free(outBuf) + end + end + return Csize_t(ret_value) +end + +# Filters Module + +""" + Lz4Filter(blockSize) + +Apply LZ4 compression. `blockSize` is the main argument. The filter id is $H5Z_FILTER_LZ4. + +# External Links +* [LZ4 HDF5 Filter ID 32004](https://portal.hdfgroup.org/display/support/Filters#Filters-32004) +* [LZ4 HDF5 Plugin Repository (C code)](https://github.com/nexusformat/HDF5-External-Filter-Plugins/tree/master/LZ4) +""" +struct Lz4Filter <: Filter + blockSize::Cuint +end +Lz4Filter() = Lz4Filter(DEFAULT_BLOCK_SIZE) + +filterid(::Type{Lz4Filter}) = H5Z_FILTER_LZ4 +filtername(::Type{Lz4Filter}) = lz4_name +filter_func(::Type{Lz4Filter}) = H5Z_filter_lz4 +filter_cfunc(::Type{Lz4Filter}) = @cfunction( + H5Z_filter_lz4, + Csize_t, + (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) +) + +function __init__() + register_filter(Lz4Filter) +end + +end # module CodecLz4Ext diff --git a/filters/H5Zlz4/LICENSE.txt b/ext/CodecLz4Ext/LICENSE.txt similarity index 100% rename from filters/H5Zlz4/LICENSE.txt rename to ext/CodecLz4Ext/LICENSE.txt diff --git a/ext/CodecLz4Ext/README.md b/ext/CodecLz4Ext/README.md new file mode 100644 index 000000000..e484d68bc --- /dev/null +++ b/ext/CodecLz4Ext/README.md @@ -0,0 +1,6 @@ +# CodecLz4Ext + +Implements the LZ4 filter for [HDF5.jl](https://github.com/JuliaIO/HDF5.jl) in Julia. +See the [documentation](https://juliaio.github.io/HDF5.jl/stable/filters/#H5Zlz4.jl) + +This implements [HDF5 registered filter id 32004](https://portal.hdfgroup.org/display/support/Filters#Filters-32004). diff --git a/filters/H5Zlz4/THIRDPARTY.txt b/ext/CodecLz4Ext/THIRDPARTY.txt similarity index 95% rename from filters/H5Zlz4/THIRDPARTY.txt rename to ext/CodecLz4Ext/THIRDPARTY.txt index 745176c31..00acf0a3c 100644 --- a/filters/H5Zlz4/THIRDPARTY.txt +++ b/ext/CodecLz4Ext/THIRDPARTY.txt @@ -1,4 +1,4 @@ -H5Zlz4.jl is derived from H5Zlz4.c from HDF5 Group +CodecLz4Ext.jl is derived from H5Zlz4.c from HDF5 Group ================================================================================ LZ4 filter plugin license @@ -42,4 +42,4 @@ THIS SOFTWARE IS PROVIDED BY THE HDF GROUP AND THE CONTRIBUTORS "AS IS" WITH NO WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED. In no event shall The HDF Group or the Contributors be liable for any damages suffered by the users arising out of the use of this software, even if -advised of the possibility of such damage. \ No newline at end of file +advised of the possibility of such damage. diff --git a/ext/CodecZstdExt/CodecZstdExt.jl b/ext/CodecZstdExt/CodecZstdExt.jl new file mode 100644 index 000000000..ebb62feea --- /dev/null +++ b/ext/CodecZstdExt/CodecZstdExt.jl @@ -0,0 +1,128 @@ +#= +Derived from https://github.com/aparamon/HDF5Plugin-Zstandard, zstd_h5plugin.c +Licensed under Apache License Version 2.0, see licenses/H5Zzstd_LICENSE.txt + +The following license applies to the Julia port. +Copyright (c) 2021 Mark Kittisopikul and Howard Hughes Medical Institute. License MIT, see LICENSE.txt +=# +module CodecZstdExt + +using CodecZstd +import CodecZstd.LibZstd +using HDF5.API +import HDF5.Filters: + Filter, filterid, register_filter, filterid, filtername, filter_func, filter_cfunc + +const H5Z_FILTER_ZSTD = API.H5Z_filter_t(32015) +const zstd_name = "Zstandard compression: http://www.zstd.net" + +export H5Z_filter_zstd, H5Z_FILTER_ZSTD, ZstdFilter + +# cd_values First optional value is the compressor aggression +# Default is CodecZstd.LibZstd.ZSTD_CLEVEL_DEFAULT +function H5Z_filter_zstd( + flags::Cuint, + cd_nelmts::Csize_t, + cd_values::Ptr{Cuint}, + nbytes::Csize_t, + buf_size::Ptr{Csize_t}, + buf::Ptr{Ptr{Cvoid}} +)::Csize_t + inbuf = unsafe_load(buf) + outbuf = C_NULL + origSize = nbytes + ret_value = Csize_t(0) + + try + if flags & API.H5Z_FLAG_REVERSE != 0 + #decompresssion + + decompSize = LibZstd.ZSTD_getDecompressedSize(inbuf, origSize) + if decompSize == 0 + error("zstd_h5plugin: Cannot retrieve decompressed chunk size") + end + outbuf = Libc.malloc(decompSize) + if outbuf == C_NULL + error( + "zstd_h5plugin: Cannot allocate memory for outbuf during decompression." + ) + end + decompSize = LibZstd.ZSTD_decompress(outbuf, decompSize, inbuf, origSize) + Libc.free(inbuf) + unsafe_store!(buf, outbuf) + outbuf = C_NULL + ret_value = Csize_t(decompSize) + else + # compression + + if cd_nelmts > 0 + aggression = Cint(unsafe_load(cd_values)) + else + aggression = CodecZstd.LibZstd.ZSTD_CLEVEL_DEFAULT + end + + if aggression < 1 + aggression = 1 # ZSTD_minCLevel() + elseif aggression > LibZstd.ZSTD_maxCLevel() + aggression = LibZstd.ZSTD_maxCLevel() + end + + compSize = LibZstd.ZSTD_compressBound(origSize) + outbuf = Libc.malloc(compSize) + if outbuf == C_NULL + error( + "zstd_h5plugin: Cannot allocate memory for outbuf during compression." + ) + end + + compSize = LibZstd.ZSTD_compress(outbuf, compSize, inbuf, origSize, aggression) + + Libc.free(unsafe_load(buf)) + unsafe_store!(buf, outbuf) + unsafe_store!(buf_size, compSize) + outbuf = C_NULL + ret_value = compSize + end + catch e + # "In the case of failure, the return value is 0 (zero) and all pointer arguments are left unchanged." + ret_value = Csize_t(0) + # Output Julia error via async so we do not task switch during callback + @async @error "H5Zzstd Non-Fatal ERROR: " exception = (e, catch_backtrace()) + finally + if outbuf != C_NULL + Libc.free(outbuf) + end + end # try catch finally + return Csize_t(ret_value) +end + +# Filters Module + +""" + ZstdFilter(clevel) + +Zstandard compression filter. `clevel` determines the compression level. + +# External Links +* [Zstandard HDF5 Filter ID 32015](https://portal.hdfgroup.org/display/support/Filters#Filters-32015) +* [Zstandard HDF5 Plugin Repository (C code)](https://github.com/aparamon/HDF5Plugin-Zstandard) +""" +struct ZstdFilter <: Filter + clevel::Cuint +end +ZstdFilter() = ZstdFilter(CodecZstd.LibZstd.ZSTD_CLEVEL_DEFAULT) + +filterid(::Type{ZstdFilter}) = H5Z_FILTER_ZSTD +filtername(::Type{ZstdFilter}) = zstd_name +filter_func(::Type{ZstdFilter}) = H5Z_filter_zstd +filter_cfunc(::Type{ZstdFilter}) = @cfunction( + H5Z_filter_zstd, + Csize_t, + (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) +) + +function __init__() + register_filter(ZstdFilter) +end + +end # module H5Zzstd diff --git a/filters/H5Zzstd/LICENSE.txt b/ext/CodecZstdExt/LICENSE.txt similarity index 100% rename from filters/H5Zzstd/LICENSE.txt rename to ext/CodecZstdExt/LICENSE.txt diff --git a/ext/CodecZstdExt/Project.toml.old b/ext/CodecZstdExt/Project.toml.old new file mode 100644 index 000000000..2f4c1256c --- /dev/null +++ b/ext/CodecZstdExt/Project.toml.old @@ -0,0 +1,12 @@ +name = "H5Zzstd" +uuid = "f6f2d980-1ec6-471c-a70d-0270e22f1103" +version = "0.1.2" + +[deps] +CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2" +HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" + +[compat] +HDF5 = "0.17" +CodecZstd = "0.7, 0.8" +julia = "1.3" diff --git a/ext/CodecZstdExt/README.md b/ext/CodecZstdExt/README.md new file mode 100644 index 000000000..e4e54e8cc --- /dev/null +++ b/ext/CodecZstdExt/README.md @@ -0,0 +1,8 @@ +# CodecZstdExt + +Implements the Zstd filter for [HDF5.jl](https://github.com/JuliaIO/HDF5.jl) in Julia. +See the [documentation](https://juliaio.github.io/HDF5.jl/stable/filters/#H5Zzstd.jl) + +This implements [HDF5 ZStandard Filter 32015](https://portal.hdfgroup.org/display/support/Filters#Filters-32015) + +The contents of this package is derived from H5Zzstd.jl diff --git a/filters/H5Zzstd/THIRDPARTY.txt b/ext/CodecZstdExt/THIRDPARTY.txt similarity index 100% rename from filters/H5Zzstd/THIRDPARTY.txt rename to ext/CodecZstdExt/THIRDPARTY.txt diff --git a/ext/bitshuffle_jll_ext/LICENSE.txt b/ext/bitshuffle_jll_ext/LICENSE.txt new file mode 100644 index 000000000..281fd00a6 --- /dev/null +++ b/ext/bitshuffle_jll_ext/LICENSE.txt @@ -0,0 +1,23 @@ +H5Zbitshuffle - Julia wrapping of bitshuffle HDF5 Filter for improving +compression of typed binary data. + +Copyright (c) Australian Nuclear Science and Technology Organisation +2022 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/ext/bitshuffle_jll_ext/README.md b/ext/bitshuffle_jll_ext/README.md new file mode 100644 index 000000000..201f1eca9 --- /dev/null +++ b/ext/bitshuffle_jll_ext/README.md @@ -0,0 +1,6 @@ +# bitshuffle_jll_ext + +Implements the bitshuffle filter for [HDF5.jl](https://github.com/JuliaIO/HDF5.jl) in Julia, +with optional integrated lz4 and zstd (de)compression. + +This implements [HDF5 filter ID 32008](https://portal.hdfgroup.org/display/support/Filters#Filters-32008) diff --git a/ext/bitshuffle_jll_ext/bitshuffle_jll_ext.jl b/ext/bitshuffle_jll_ext/bitshuffle_jll_ext.jl new file mode 100644 index 000000000..476c896ef --- /dev/null +++ b/ext/bitshuffle_jll_ext/bitshuffle_jll_ext.jl @@ -0,0 +1,388 @@ +#== +Julia code wrapping the bitshuffle filter for HDF5. A rough translation of +bshuf_h5filter.c by Kiyoshi Masui, see +https://github.com/kiyo-masui/bitshuffle. +Originally authored by "James.Hester " as H5Zbitshuffle +==# +""" +The bitshuffle filter for HDF5. See https://portal.hdfgroup.org/display/support/Filters#Filters-32008 +and https://github.com/kiyo-masui/bitshuffle for details. +""" +module bitshuffle_jll_ext + +using bitshuffle_jll + +using HDF5.API +import HDF5.Filters: + Filter, + filterid, + register_filter, + filtername, + filter_func, + filter_cfunc, + set_local_func, + set_local_cfunc + +export BSHUF_H5_COMPRESS_LZ4, + BSHUF_H5_COMPRESS_ZSTD, BitshuffleFilter, H5Z_filter_bitshuffle + +# From bshuf_h5filter.h + +const BSHUF_H5_COMPRESS_LZ4 = 2 +const BSHUF_H5_COMPRESS_ZSTD = 3 +const H5Z_FILTER_BITSHUFFLE = API.H5Z_filter_t(32008) + +const BSHUF_VERSION_MAJOR = 0 +const BSHUF_VERSION_MINOR = 4 +const BSHUF_VERSION_POINT = 2 + +const bitshuffle_name = "HDF5 bitshuffle filter; see https://github.com/kiyo-masui/bitshuffle" + +# Set filter arguments + +function bitshuffle_set_local(dcpl::API.hid_t, htype::API.hid_t, space::API.hid_t) + + # Sanity check of provided values and set element size + + bs_flags = Ref{Cuint}() + bs_values = Vector{Cuint}(undef, 8) + bs_nelements = Ref{Csize_t}(length(bs_values)) + + API.h5p_get_filter_by_id( + dcpl, H5Z_FILTER_BITSHUFFLE, bs_flags, bs_nelements, bs_values, 0, C_NULL, C_NULL + ) + + @debug "Initial filter info" bs_flags bs_values bs_nelements + + flags = bs_flags[] + + # set values + + bs_values[1] = BSHUF_VERSION_MAJOR + bs_values[2] = BSHUF_VERSION_MINOR + + elem_size = API.h5t_get_size(htype) + + @debug "Element size for $htype reported as $elem_size" + + if elem_size <= 0 + return API.herr_t(-1) + end + + bs_values[3] = elem_size + nelements = bs_nelements[] + + # check user-supplied values + + if nelements > 3 + if bs_values[4] % 8 != 0 || bs_values[4] < 0 + return API.herr_t(-1) + end + end + + if nelements > 4 + if !(bs_values[5] in (0, BSHUF_H5_COMPRESS_LZ4, BSHUF_H5_COMPRESS_ZSTD)) + return API.herr_t(-1) + end + end + + @debug "Final values" bs_values + + API.h5p_modify_filter(dcpl, H5Z_FILTER_BITSHUFFLE, bs_flags[], nelements, bs_values) + + return API.herr_t(1) +end + +function H5Z_filter_bitshuffle( + flags::Cuint, + cd_nelmts::Csize_t, + cd_values::Ptr{Cuint}, + nbytes::Csize_t, + buf_size::Ptr{Csize_t}, + buf::Ptr{Ptr{Cvoid}} +)::Csize_t + in_buf = unsafe_load(buf) #in_buf is *void + out_buf = C_NULL + nbytes_out = 0 + block_size = 0 + + try #mop up errors at end + @debug "nelmts" cd_nelmts + + if cd_nelmts < 3 + error("bitshuffle_h5plugin: Not enough elements provided to bitshuffle filter") + end + + # Get needed information + + major = unsafe_load(cd_values, 1) + minor = unsafe_load(cd_values, 2) + elem_size = unsafe_load(cd_values, 3) + comp_lvl = unsafe_load(cd_values, 6) + compress_flag = unsafe_load(cd_values, 5) + + if cd_nelmts > 3 + block_size = unsafe_load(cd_values, 4) + end + + @debug "Major,minor:" major minor + @debug "element size, compress_level, compress_flag" elem_size comp_lvl compress_flag + + if block_size == 0 + block_size = ccall( + (:bshuf_default_block_size, libbitshuffle), Csize_t, (Csize_t,), elem_size + ) + end + + # Work out buffer sizes + + if cd_nelmts > 4 && + (compress_flag in (BSHUF_H5_COMPRESS_LZ4, BSHUF_H5_COMPRESS_ZSTD)) + + # Use compression + + if (flags & API.H5Z_FLAG_REVERSE) != 0 # unshuffle and decompress + + # First 8 bytes is number of uncompressed bytes + nbytes_uncomp = ccall( + (:bshuf_read_uint64_BE, libbitshuffle), UInt64, (Ptr{Cvoid},), in_buf + ) + # Next 4 bytes are the block size + + block_size = + ccall( + (:bshuf_read_uint32_BE, libbitshuffle), + UInt32, + (Ptr{Cvoid},), + in_buf + 8 + ) ÷ elem_size + + in_buf += 12 + buf_size_out = nbytes_uncomp + + else #shuffle and compress + nbytes_uncomp = nbytes + if compress_flag == BSHUF_H5_COMPRESS_LZ4 + buf_size_out = + ccall( + (:bshuf_compress_lz4_bound, libbitshuffle), + Csize_t, + (Csize_t, Csize_t, Csize_t), + nbytes_uncomp ÷ elem_size, + elem_size, + block_size + ) + 12 + elseif compress_flag == BSHUF_H5_COMPRESS_ZSTD + buf_size_out = + ccall( + (:bshuf_compress_zstd_bound, libbitshuffle), + Csize_t, + (Csize_t, Csize_t, Csize_t), + nbytes_uncomp ÷ elem_size, + elem_size, + block_size + ) + 12 + end + end + + else # No compression required + nbytes_uncomp = nbytes + buf_size_out = nbytes + end + + if nbytes_uncomp % elem_size != 0 + error( + "bitshuffle_h5plugin: Uncompressed size $nbytes_uncomp is not a multiple of $elem_size" + ) + end + + size = nbytes_uncomp ÷ elem_size + buf_size_out <= 0 && error( + "bitshuffle_h5plugin: Non-positive buf_size_out for malloc: $buf_size_out" + ) + out_buf = Libc.malloc(buf_size_out) + if out_buf == C_NULL + error( + "bitshuffle_h5plugin: Cannot allocate memory for outbuf during decompression" + ) + end + + # Now perform the decompression + + if cd_nelmts > 4 && + (compress_flag in (BSHUF_H5_COMPRESS_LZ4, BSHUF_H5_COMPRESS_ZSTD)) + if flags & API.H5Z_FLAG_REVERSE != 0 #unshuffle and decompress + if compress_flag == BSHUF_H5_COMPRESS_LZ4 + err = ccall( + (:bshuf_decompress_lz4, libbitshuffle), + Int64, + (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), + in_buf, + out_buf, + size, + elem_size, + block_size + ) + elseif compress_flag == BSHUF_H5_COMPRESS_ZSTD + err = ccall( + (:bshuf_decompress_zstd, libbitshuffle), + Int64, + (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), + in_buf, + out_buf, + size, + elem_size, + block_size + ) + end + nbytes_out = nbytes_uncomp + + else #shuffle and compress + ccall( + (:bshuf_write_uint64_BE, libbitshuffle), + Cvoid, + (Ptr{Cvoid}, UInt64), + out_buf, + nbytes_uncomp + ) + ccall( + (:bshuf_write_uint32_BE, libbitshuffle), + Cvoid, + (Ptr{Cvoid}, UInt32), + out_buf + 8, + block_size * elem_size + ) + + if compress_flag == BSHUF_H5_COMPRESS_LZ4 + err = ccall( + (:bshuf_compress_lz4, libbitshuffle), + Int64, + (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), + in_buf, + out_buf + 12, + size, + elem_size, + block_size + ) + else + err = ccall( + (:bshuf_compress_zstd, libbitshuffle), + Int64, + (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), + in_buf, + out_buf + 12, + size, + elem_size, + block_size + ) + end + + nbytes_out = err + 12 + end + else # just the shuffle thanks + if flags & API.H5Z_FLAG_REVERSE != 0 + err = ccall( + (:bshuf_bitunshuffle, libbitshuffle), + Int64, + (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), + in_buf, + out_buf, + size, + elem_size, + block_size + ) + else + err = ccall( + (:bshuf_bitshuffle, libbitshuffle), + Int64, + (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), + in_buf, + out_buf, + size, + elem_size, + block_size + ) + end + + nbytes_out = nbytes + end + + # And wrap it up + + if err < 0 + error("h5plugin_bitshuffle: Error in bitshuffle with code $err") + end + + Libc.free(unsafe_load(buf)) + unsafe_store!(buf, out_buf) + unsafe_store!(buf_size, Csize_t(buf_size_out)) + out_buf = C_NULL + + catch e + + # On failure, return 0 and change no arguments + + nbytes_out = Csize_t(0) + @error "Non-fatal H5 bitshuffle plugin error: " e + display(stacktrace(catch_backtrace())) + + finally + if out_buf != C_NULL + Libc.free(out_buf) + end + end + + return Csize_t(nbytes_out) +end + +# Filter registration + +# All information for the filter + +struct BitshuffleFilter <: Filter + major::Cuint + minor::Cuint + typesize::Cuint + blocksize::Cuint + compression::Cuint + comp_level::Cuint #Zstd only +end + +""" + BitshuffleFilter(blocksize=0,compressor=:none,comp_level=0) + +The Bitshuffle filter can optionally include compression :lz4 or :zstd. For :zstd +comp_level can be provided. This is ignored for :lz4 compression. If `blocksize` +is zero the default bitshuffle blocksize is used. +""" +function BitshuffleFilter(; blocksize=0, compressor=:none, comp_level=0) + compressor in (:lz4, :zstd, :none) || + throw(ArgumentError("Invalid bitshuffle compression $compressor")) + compcode = 0 + if compressor == :lz4 + compcode = BSHUF_H5_COMPRESS_LZ4 + elseif compressor == :zstd + compcode = BSHUF_H5_COMPRESS_ZSTD + end + BitshuffleFilter( + BSHUF_VERSION_MAJOR, BSHUF_VERSION_MINOR, 0, blocksize, compcode, comp_level + ) +end + +filterid(::Type{BitshuffleFilter}) = H5Z_FILTER_BITSHUFFLE +filtername(::Type{BitshuffleFilter}) = bitshuffle_name +set_local_func(::Type{BitshuffleFilter}) = bitshuffle_set_local +set_local_cfunc(::Type{BitshuffleFilter}) = + @cfunction(bitshuffle_set_local, API.herr_t, (API.hid_t, API.hid_t, API.hid_t)) +filterfunc(::Type{BitshuffleFilter}) = H5Z_filter_bitshuffle +filter_cfunc(::Type{BitshuffleFilter}) = @cfunction( + H5Z_filter_bitshuffle, + Csize_t, + (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) +) + +function __init__() + register_filter(BitshuffleFilter) +end + +end # module diff --git a/filters/H5Zbitshuffle/Project.toml b/filters/H5Zbitshuffle/Project.toml index 296b42843..87c479fc3 100644 --- a/filters/H5Zbitshuffle/Project.toml +++ b/filters/H5Zbitshuffle/Project.toml @@ -1,7 +1,7 @@ name = "H5Zbitshuffle" uuid = "51b4e782-877f-4ccf-958a-27bf628210da" authors = ["James.Hester "] -version = "0.1.3" +version = "0.2.0" [deps] HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" @@ -10,4 +10,4 @@ bitshuffle_jll = "228fe19c-1b83-5282-a626-13744502a320" [compat] HDF5 = "0.17" bitshuffle_jll = "0.4.2, 0.5" -julia = "1.6" +julia = "1.9" diff --git a/filters/H5Zbitshuffle/src/H5Zbitshuffle.jl b/filters/H5Zbitshuffle/src/H5Zbitshuffle.jl index 5f1268249..1d66a9370 100644 --- a/filters/H5Zbitshuffle/src/H5Zbitshuffle.jl +++ b/filters/H5Zbitshuffle/src/H5Zbitshuffle.jl @@ -1,7 +1,5 @@ #== -Julia code wrapping the bitshuffle filter for HDF5. A rough translation of -bshuf_h5filter.c by Kiyoshi Masui, see -https://github.com/kiyo-masui/bitshuffle. +Most of the code has been moved into ==# """ The bitshuffle filter for HDF5. See https://portal.hdfgroup.org/display/support/Filters#Filters-32008 @@ -11,6 +9,8 @@ module H5Zbitshuffle using bitshuffle_jll +using HDF5: HDF5 +# The next block of lines can be removed using HDF5.API import HDF5.Filters: Filter, @@ -25,363 +25,21 @@ import HDF5.Filters: export BSHUF_H5_COMPRESS_LZ4, BSHUF_H5_COMPRESS_ZSTD, BitshuffleFilter, H5Z_filter_bitshuffle -# From bshuf_h5filter.h +const bitshuffle_jll_ext = Base.get_extension(HDF5, :bitshuffle_jll_ext) -const BSHUF_H5_COMPRESS_LZ4 = 2 -const BSHUF_H5_COMPRESS_ZSTD = 3 -const H5Z_FILTER_BITSHUFFLE = API.H5Z_filter_t(32008) +using .bitshuffle_jll_ext: BSHUF_H5_COMPRESS_LZ4 +using .bitshuffle_jll_ext: BSHUF_H5_COMPRESS_ZSTD +using .bitshuffle_jll_ext: BitshuffleFilter +using .bitshuffle_jll_ext: H5Z_filter_bitshuffle -const BSHUF_VERSION_MAJOR = 0 -const BSHUF_VERSION_MINOR = 4 -const BSHUF_VERSION_POINT = 2 +using .bitshuffle_jll_ext: BSHUF_H5_COMPRESS_LZ4 +using .bitshuffle_jll_ext: BSHUF_H5_COMPRESS_ZSTD +using .bitshuffle_jll_ext: H5Z_FILTER_BITSHUFFLE -const bitshuffle_name = "HDF5 bitshuffle filter; see https://github.com/kiyo-masui/bitshuffle" +using .bitshuffle_jll_ext: BSHUF_VERSION_MAJOR +using .bitshuffle_jll_ext: BSHUF_VERSION_MINOR +using .bitshuffle_jll_ext: BSHUF_VERSION_POINT -# Set filter arguments - -function bitshuffle_set_local(dcpl::API.hid_t, htype::API.hid_t, space::API.hid_t) - - # Sanity check of provided values and set element size - - bs_flags = Ref{Cuint}() - bs_values = Vector{Cuint}(undef, 8) - bs_nelements = Ref{Csize_t}(length(bs_values)) - - API.h5p_get_filter_by_id( - dcpl, H5Z_FILTER_BITSHUFFLE, bs_flags, bs_nelements, bs_values, 0, C_NULL, C_NULL - ) - - @debug "Initial filter info" bs_flags bs_values bs_nelements - - flags = bs_flags[] - - # set values - - bs_values[1] = BSHUF_VERSION_MAJOR - bs_values[2] = BSHUF_VERSION_MINOR - - elem_size = API.h5t_get_size(htype) - - @debug "Element size for $htype reported as $elem_size" - - if elem_size <= 0 - return API.herr_t(-1) - end - - bs_values[3] = elem_size - nelements = bs_nelements[] - - # check user-supplied values - - if nelements > 3 - if bs_values[4] % 8 != 0 || bs_values[4] < 0 - return API.herr_t(-1) - end - end - - if nelements > 4 - if !(bs_values[5] in (0, BSHUF_H5_COMPRESS_LZ4, BSHUF_H5_COMPRESS_ZSTD)) - return API.herr_t(-1) - end - end - - @debug "Final values" bs_values - - API.h5p_modify_filter(dcpl, H5Z_FILTER_BITSHUFFLE, bs_flags[], nelements, bs_values) - - return API.herr_t(1) -end - -function H5Z_filter_bitshuffle( - flags::Cuint, - cd_nelmts::Csize_t, - cd_values::Ptr{Cuint}, - nbytes::Csize_t, - buf_size::Ptr{Csize_t}, - buf::Ptr{Ptr{Cvoid}} -)::Csize_t - in_buf = unsafe_load(buf) #in_buf is *void - out_buf = C_NULL - nbytes_out = 0 - block_size = 0 - - try #mop up errors at end - @debug "nelmts" cd_nelmts - - if cd_nelmts < 3 - error("bitshuffle_h5plugin: Not enough elements provided to bitshuffle filter") - end - - # Get needed information - - major = unsafe_load(cd_values, 1) - minor = unsafe_load(cd_values, 2) - elem_size = unsafe_load(cd_values, 3) - comp_lvl = unsafe_load(cd_values, 6) - compress_flag = unsafe_load(cd_values, 5) - - if cd_nelmts > 3 - block_size = unsafe_load(cd_values, 4) - end - - @debug "Major,minor:" major minor - @debug "element size, compress_level, compress_flag" elem_size comp_lvl compress_flag - - if block_size == 0 - block_size = ccall( - (:bshuf_default_block_size, libbitshuffle), Csize_t, (Csize_t,), elem_size - ) - end - - # Work out buffer sizes - - if cd_nelmts > 4 && - (compress_flag in (BSHUF_H5_COMPRESS_LZ4, BSHUF_H5_COMPRESS_ZSTD)) - - # Use compression - - if (flags & API.H5Z_FLAG_REVERSE) != 0 # unshuffle and decompress - - # First 8 bytes is number of uncompressed bytes - nbytes_uncomp = ccall( - (:bshuf_read_uint64_BE, libbitshuffle), UInt64, (Ptr{Cvoid},), in_buf - ) - # Next 4 bytes are the block size - - block_size = - ccall( - (:bshuf_read_uint32_BE, libbitshuffle), - UInt32, - (Ptr{Cvoid},), - in_buf + 8 - ) ÷ elem_size - - in_buf += 12 - buf_size_out = nbytes_uncomp - - else #shuffle and compress - nbytes_uncomp = nbytes - if compress_flag == BSHUF_H5_COMPRESS_LZ4 - buf_size_out = - ccall( - (:bshuf_compress_lz4_bound, libbitshuffle), - Csize_t, - (Csize_t, Csize_t, Csize_t), - nbytes_uncomp ÷ elem_size, - elem_size, - block_size - ) + 12 - elseif compress_flag == BSHUF_H5_COMPRESS_ZSTD - buf_size_out = - ccall( - (:bshuf_compress_zstd_bound, libbitshuffle), - Csize_t, - (Csize_t, Csize_t, Csize_t), - nbytes_uncomp ÷ elem_size, - elem_size, - block_size - ) + 12 - end - end - - else # No compression required - nbytes_uncomp = nbytes - buf_size_out = nbytes - end - - if nbytes_uncomp % elem_size != 0 - error( - "bitshuffle_h5plugin: Uncompressed size $nbytes_uncomp is not a multiple of $elem_size" - ) - end - - size = nbytes_uncomp ÷ elem_size - buf_size_out <= 0 && error( - "bitshuffle_h5plugin: Non-positive buf_size_out for malloc: $buf_size_out" - ) - out_buf = Libc.malloc(buf_size_out) - if out_buf == C_NULL - error( - "bitshuffle_h5plugin: Cannot allocate memory for outbuf during decompression" - ) - end - - # Now perform the decompression - - if cd_nelmts > 4 && - (compress_flag in (BSHUF_H5_COMPRESS_LZ4, BSHUF_H5_COMPRESS_ZSTD)) - if flags & API.H5Z_FLAG_REVERSE != 0 #unshuffle and decompress - if compress_flag == BSHUF_H5_COMPRESS_LZ4 - err = ccall( - (:bshuf_decompress_lz4, libbitshuffle), - Int64, - (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), - in_buf, - out_buf, - size, - elem_size, - block_size - ) - elseif compress_flag == BSHUF_H5_COMPRESS_ZSTD - err = ccall( - (:bshuf_decompress_zstd, libbitshuffle), - Int64, - (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), - in_buf, - out_buf, - size, - elem_size, - block_size - ) - end - nbytes_out = nbytes_uncomp - - else #shuffle and compress - ccall( - (:bshuf_write_uint64_BE, libbitshuffle), - Cvoid, - (Ptr{Cvoid}, UInt64), - out_buf, - nbytes_uncomp - ) - ccall( - (:bshuf_write_uint32_BE, libbitshuffle), - Cvoid, - (Ptr{Cvoid}, UInt32), - out_buf + 8, - block_size * elem_size - ) - - if compress_flag == BSHUF_H5_COMPRESS_LZ4 - err = ccall( - (:bshuf_compress_lz4, libbitshuffle), - Int64, - (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), - in_buf, - out_buf + 12, - size, - elem_size, - block_size - ) - else - err = ccall( - (:bshuf_compress_zstd, libbitshuffle), - Int64, - (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), - in_buf, - out_buf + 12, - size, - elem_size, - block_size - ) - end - - nbytes_out = err + 12 - end - else # just the shuffle thanks - if flags & API.H5Z_FLAG_REVERSE != 0 - err = ccall( - (:bshuf_bitunshuffle, libbitshuffle), - Int64, - (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), - in_buf, - out_buf, - size, - elem_size, - block_size - ) - else - err = ccall( - (:bshuf_bitshuffle, libbitshuffle), - Int64, - (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), - in_buf, - out_buf, - size, - elem_size, - block_size - ) - end - - nbytes_out = nbytes - end - - # And wrap it up - - if err < 0 - error("h5plugin_bitshuffle: Error in bitshuffle with code $err") - end - - Libc.free(unsafe_load(buf)) - unsafe_store!(buf, out_buf) - unsafe_store!(buf_size, Csize_t(buf_size_out)) - out_buf = C_NULL - - catch e - - # On failure, return 0 and change no arguments - - nbytes_out = Csize_t(0) - @error "Non-fatal H5 bitshuffle plugin error: " e - display(stacktrace(catch_backtrace())) - - finally - if out_buf != C_NULL - Libc.free(out_buf) - end - end - - return Csize_t(nbytes_out) -end - -# Filter registration - -# All information for the filter - -struct BitshuffleFilter <: Filter - major::Cuint - minor::Cuint - typesize::Cuint - blocksize::Cuint - compression::Cuint - comp_level::Cuint #Zstd only -end - -""" - BitshuffleFilter(blocksize=0,compressor=:none,comp_level=0) - -The Bitshuffle filter can optionally include compression :lz4 or :zstd. For :zstd -comp_level can be provided. This is ignored for :lz4 compression. If `blocksize` -is zero the default bitshuffle blocksize is used. -""" -function BitshuffleFilter(; blocksize=0, compressor=:none, comp_level=0) - compressor in (:lz4, :zstd, :none) || - throw(ArgumentError("Invalid bitshuffle compression $compressor")) - compcode = 0 - if compressor == :lz4 - compcode = BSHUF_H5_COMPRESS_LZ4 - elseif compressor == :zstd - compcode = BSHUF_H5_COMPRESS_ZSTD - end - BitshuffleFilter( - BSHUF_VERSION_MAJOR, BSHUF_VERSION_MINOR, 0, blocksize, compcode, comp_level - ) -end - -filterid(::Type{BitshuffleFilter}) = H5Z_FILTER_BITSHUFFLE -filtername(::Type{BitshuffleFilter}) = bitshuffle_name -set_local_func(::Type{BitshuffleFilter}) = bitshuffle_set_local -set_local_cfunc(::Type{BitshuffleFilter}) = - @cfunction(bitshuffle_set_local, API.herr_t, (API.hid_t, API.hid_t, API.hid_t)) -filterfunc(::Type{BitshuffleFilter}) = H5Z_filter_bitshuffle -filter_cfunc(::Type{BitshuffleFilter}) = @cfunction( - H5Z_filter_bitshuffle, - Csize_t, - (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) -) - -function __init__() - register_filter(BitshuffleFilter) -end +using .bitshuffle_jll_ext: bitshuffle_name end # module diff --git a/filters/H5Zblosc/Project.toml b/filters/H5Zblosc/Project.toml index f41373845..e885229d7 100644 --- a/filters/H5Zblosc/Project.toml +++ b/filters/H5Zblosc/Project.toml @@ -1,6 +1,6 @@ name = "H5Zblosc" uuid = "c8ec2601-a99c-407f-b158-e79c03c2f5f7" -version = "0.1.2" +version = "0.2.0" [deps] Blosc = "a74b3585-a348-5f62-a45c-50e91977d574" @@ -9,4 +9,4 @@ HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" [compat] HDF5 = "0.17" Blosc = "0.7.3" -julia = "1.3" +julia = "1.9" diff --git a/filters/H5Zblosc/src/H5Zblosc.jl b/filters/H5Zblosc/src/H5Zblosc.jl index 2806e322b..7cc86f3e3 100644 --- a/filters/H5Zblosc/src/H5Zblosc.jl +++ b/filters/H5Zblosc/src/H5Zblosc.jl @@ -2,6 +2,9 @@ module H5Zblosc # port of https://github.com/Blosc/c-blosc/blob/3a668dcc9f61ad22b5c0a0ab45fe8dad387277fd/hdf5/blosc_filter.c (copyright 2010 Francesc Alted, license: MIT/expat) import Blosc +using HDF5: HDF5 + +# The next block of lines can be removed using HDF5.API import HDF5.Filters: Filter, FilterPipeline import HDF5.Filters: @@ -19,205 +22,13 @@ export H5Z_FILTER_BLOSC, blosc_filter, BloscFilter # Import Blosc shuffle constants import Blosc: NOSHUFFLE, SHUFFLE, BITSHUFFLE -const H5Z_FILTER_BLOSC = API.H5Z_filter_t(32001) # Filter ID registered with the HDF Group for Blosc -const FILTER_BLOSC_VERSION = 2 -const blosc_name = "blosc" - -function blosc_set_local(dcpl::API.hid_t, htype::API.hid_t, space::API.hid_t) - blosc_flags = Ref{Cuint}() - blosc_values = Vector{Cuint}(undef, 8) - blosc_nelements = Ref{Csize_t}(length(blosc_values)) - blosc_chunkdims = Vector{API.hsize_t}(undef, 32) - - API.h5p_get_filter_by_id( - dcpl, - H5Z_FILTER_BLOSC, - blosc_flags, - blosc_nelements, - blosc_values, - 0, - C_NULL, - C_NULL - ) - flags = blosc_flags[] - - nelements = max(blosc_nelements[], 4) # First 4 slots reserved - - # Set Blosc info in first two slots - blosc_values[1] = FILTER_BLOSC_VERSION - blosc_values[2] = Blosc.VERSION_FORMAT - - ndims = API.h5p_get_chunk(dcpl, 32, blosc_chunkdims) - chunksize = prod(resize!(blosc_chunkdims, ndims)) - if ndims < 0 || ndims > 32 || chunksize > Blosc.MAX_BUFFERSIZE - return API.herr_t(-1) - end - - htypesize = API.h5t_get_size(htype) - if API.h5t_get_class(htype) == API.H5T_ARRAY - hsuper = API.h5t_get_super(htype) - basetypesize = API.h5t_get_size(hsuper) - API.h5t_close(hsuper) - else - basetypesize = htypesize - end - - # Limit large typesizes (they are pretty inefficient to shuffle - # and, in addition, Blosc does not handle typesizes larger than - # blocksizes). - if basetypesize > Blosc.MAX_TYPESIZE - basetypesize = 1 - end - blosc_values[3] = basetypesize - blosc_values[4] = chunksize * htypesize # size of the chunk - - API.h5p_modify_filter(dcpl, H5Z_FILTER_BLOSC, flags, nelements, blosc_values) - - return API.herr_t(1) -end - -function blosc_filter( - flags::Cuint, - cd_nelmts::Csize_t, - cd_values::Ptr{Cuint}, - nbytes::Csize_t, - buf_size::Ptr{Csize_t}, - buf::Ptr{Ptr{Cvoid}} -) - typesize = unsafe_load(cd_values, 3) # The datatype size - outbuf_size = unsafe_load(cd_values, 4) - # Compression level: - clevel = cd_nelmts >= 5 ? unsafe_load(cd_values, 5) : Cuint(5) - # Do shuffle: - doshuffle = cd_nelmts >= 6 ? unsafe_load(cd_values, 6) : SHUFFLE - - if (flags & API.H5Z_FLAG_REVERSE) == 0 # compressing - # Allocate an output buffer exactly as long as the input data; if - # the result is larger, we simply return 0. The filter is flagged - # as optional, so HDF5 marks the chunk as uncompressed and proceeds. - outbuf_size = unsafe_load(buf_size) - outbuf_size <= 0 && return Csize_t(0) - outbuf = Libc.malloc(outbuf_size) - outbuf == C_NULL && return Csize_t(0) - - compname = if cd_nelmts >= 7 - compcode = unsafe_load(cd_values, 7) - Blosc.compname(compcode) - else - "blosclz" - end - Blosc.set_compressor(compname) - status = Blosc.blosc_compress( - clevel, doshuffle, typesize, nbytes, unsafe_load(buf), outbuf, nbytes - ) - status < 0 && (Libc.free(outbuf); return Csize_t(0)) - else # decompressing - # Extract the exact outbuf_size from the buffer header. - # - # NOTE: the guess value got from "cd_values" corresponds to the - # uncompressed chunk size but it should not be used in a general - # cases since other filters in the pipeline can modify the buffer - # size. - in = unsafe_load(buf) - # See https://github.com/JuliaLang/julia/issues/43402 - # Resolved in https://github.com/JuliaLang/julia/pull/43408 - outbuf_size, cbytes, blocksize = Blosc.cbuffer_sizes(in) - outbuf_size <= 0 && return Csize_t(0) - outbuf = Libc.malloc(outbuf_size) - outbuf == C_NULL && return Csize_t(0) - status = Blosc.blosc_decompress(in, outbuf, outbuf_size) - status <= 0 && (Libc.free(outbuf); return Csize_t(0)) - end - - if status != 0 - Libc.free(unsafe_load(buf)) - unsafe_store!(buf, outbuf) - unsafe_store!(buf_size, outbuf_size) - return Csize_t(status) # size of compressed/decompressed data - end - Libc.free(outbuf) - return Csize_t(0) -end - -""" - BloscFilter(;level=5, shuffle=true, compressor="blosclz") - -The Blosc compression filter, using [Blosc.jl](https://github.com/JuliaIO/Blosc.jl). Options: - - - `level`: compression level - - `shuffle`: whether to shuffle data before compressing (this option should be used instead of the [`Shuffle`](@ref) filter) - - `compressor`: the compression algorithm. Call `Blosc.compressors()` for the available compressors. - -# External links -* [What Is Blosc?](https://www.blosc.org/pages/blosc-in-depth/) -* [Blosc HDF5 Filter ID 32001](https://portal.hdfgroup.org/display/support/Filters#Filters-32001) -* [Blosc HDF5 Plugin Repository (C code)](https://github.com/Blosc/hdf5-blosc) -""" -struct BloscFilter <: Filter - blosc_version::Cuint - version_format::Cuint - typesize::Cuint - bufsize::Cuint - level::Cuint - shuffle::Cuint - compcode::Cuint -end - -function BloscFilter(; level=5, shuffle=SHUFFLE, compressor="blosclz") - Blosc.isvalidshuffle(shuffle) || throw(ArgumentError("invalid blosc shuffle $shuffle")) - compcode = Blosc.compcode(compressor) - BloscFilter(0, 0, 0, 0, level, shuffle, compcode) -end - -filterid(::Type{BloscFilter}) = H5Z_FILTER_BLOSC -filtername(::Type{BloscFilter}) = blosc_name -set_local_func(::Type{BloscFilter}) = blosc_set_local -set_local_cfunc(::Type{BloscFilter}) = - @cfunction(blosc_set_local, API.herr_t, (API.hid_t, API.hid_t, API.hid_t)) -filter_func(::Type{BloscFilter}) = blosc_filter -filter_cfunc(::Type{BloscFilter}) = @cfunction( - blosc_filter, - Csize_t, - (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) -) - -function Base.show(io::IO, blosc::BloscFilter) - print( - io, - BloscFilter, - "(level=", - Int(blosc.level), - ",shuffle=", - blosc.shuffle == NOSHUFFLE ? "NOSHUFFLE" : - blosc.shuffle == SHUFFLE ? "SHUFFLE" : - blosc.shuffle == BITSHUFFLE ? "BITSHUFFLE" : - "UNKNOWN", - ",compressor=", - Blosc.compname(blosc.compcode), - ")" - ) -end +const BloscExt = Base.get_extension(HDF5, :BloscExt) -function Base.push!(f::FilterPipeline, blosc::BloscFilter) - 0 <= blosc.level <= 9 || - throw(ArgumentError("blosc compression $(blosc.level) not in [0,9]")) - Blosc.isvalidshuffle(blosc.shuffle) || - throw(ArgumentError("invalid blosc shuffle $(blosc.shuffle)")) - ref = Ref(blosc) - GC.@preserve ref begin - API.h5p_set_filter( - f.plist, - filterid(BloscFilter), - API.H5Z_FLAG_OPTIONAL, - div(sizeof(BloscFilter), sizeof(Cuint)), - pointer_from_objref(ref) - ) - end - return f -end +using .BloscExt: blosc_filter +using .BloscExt: BloscFilter -function __init__() - register_filter(BloscFilter) -end +using .BloscExt: H5Z_FILTER_BLOSC +using .BloscExt: FILTER_BLOSC_VERSION +using .BloscExt: blosc_name end # module H5Zblosc diff --git a/filters/H5Zbzip2/Project.toml b/filters/H5Zbzip2/Project.toml index 547e7a2cb..197c78c63 100644 --- a/filters/H5Zbzip2/Project.toml +++ b/filters/H5Zbzip2/Project.toml @@ -1,6 +1,6 @@ name = "H5Zbzip2" uuid = "094576f2-1e46-4c84-8e32-c46c042eaaa2" -version = "0.1.2" +version = "0.2.0" [deps] CodecBzip2 = "523fee87-0ab8-5b00-afb7-3ecf72e48cfd" @@ -9,4 +9,4 @@ HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" [compat] HDF5 = "0.17" CodecBzip2 = "0.7, 0.8" -julia = "1.3" +julia = "1.9" diff --git a/filters/H5Zbzip2/src/H5Zbzip2.jl b/filters/H5Zbzip2/src/H5Zbzip2.jl index ec8c12c4f..b6c898127 100644 --- a/filters/H5Zbzip2/src/H5Zbzip2.jl +++ b/filters/H5Zbzip2/src/H5Zbzip2.jl @@ -1,239 +1,23 @@ #= -The code below has been ported to Julia from the original C source: -https://github.com/nexusformat/HDF5-External-Filter-Plugins/blob/master/BZIP2/src/H5Zbzip2.c -The filter function H5Z_filter_bzip2 was adopted from: -PyTables http://www.pytables.org. -The plugin can be used with the HDF5 library version 1.8.11+ to read HDF5 datasets compressed with bzip2 created by PyTables. -License: licenses/H5Zbzip2_LICENSE.txt - -The following license applies to the Julia port. Copyright (c) 2021 Mark Kittisopikul and Howard Hughes Medical Institute. License MIT, see LICENSE.txt =# module H5Zbzip2 -using CodecBzip2 -import CodecBzip2: libbzip2 +using CodecBzip2: libbzip2 +using HDF5: HDF5 + +# Remove the next three lines in the future using HDF5.API import HDF5.Filters: Filter, filterid, register_filter, filtername, filter_func, filter_cfunc export H5Z_FILTER_BZIP2, H5Z_filter_bzip2, Bzip2Filter -const H5Z_FILTER_BZIP2 = API.H5Z_filter_t(307) -const bzip2_name = "HDF5 bzip2 filter; see http://www.hdfgroup.org/services/contributions.html" - -function H5Z_filter_bzip2( - flags::Cuint, - cd_nelmts::Csize_t, - cd_values::Ptr{Cuint}, - nbytes::Csize_t, - buf_size::Ptr{Csize_t}, - buf::Ptr{Ptr{Cvoid}} -)::Csize_t - outbuf = C_NULL - outdatalen = Cuint(0) - - # Prepare the output buffer - - try - if flags & API.H5Z_FLAG_REVERSE != 0 - # Decompress - - outbuflen = nbytes * 3 + 1 - outbuflen <= 0 && - error("H5Zbzip2: Non-positive outbuflen for malloc: $outbuflen.") - outbuf = Libc.malloc(outbuflen) - if outbuf == C_NULL - error("H5Zbzip2: memory allocation failed for bzip2 decompression.") - end - - stream = CodecBzip2.BZStream() - # Just use default malloc and free - stream.bzalloc = C_NULL - stream.bzfree = C_NULL - # BZ2_bzDecompressInit - ret = CodecBzip2.decompress_init!(stream, 0, false) - if ret != CodecBzip2.BZ_OK - errror("H5Zbzip2: bzip2 decompress start failed with error $ret.") - end - - stream.next_out = outbuf - stream.avail_out = outbuflen - stream.next_in = unsafe_load(buf) - stream.avail_in = nbytes - - cont = true - - while cont - # BZ2_bzDecompress - ret = CodecBzip2.decompress!(stream) - if ret < 0 - error("H5Zbzip2: bzip2 decompression failed with error $ret.") - end - cont = ret != CodecBzip2.BZ_STREAM_END - if cont && stream.avail_out == 0 - # Grow the output buffer - newbuflen = outbuflen * 2 - newbuf = Libc.realloc(outbuf, newbuflen) - if newbuf == C_NULL - error("H5Zbzip2: memory allocation failed for bzip2 decompression.") - end - stream.next_out = newbuf + outbuflen - stream.avail_out = outbuflen - outbuf = newbuf - outbuflen = newbuflen - end - end - - outdatalen = stream.total_out_lo32 - # BZ2_bzDecompressEnd - ret = CodecBzip2.decompress_end!(stream) - if ret != CodecBzip2.BZ_OK - error("H5Zbzip2: bzip2 compression end failed with error $ret.") - end - else - # Compress data - - # Maybe not the same size as outdatalen - odatalen = Cuint(0) - blockSize100k = 9 - - # Get compression blocksize if present - if cd_nelmts > 0 - blockSize100k = unsafe_load(cd_values) - if blockSize100k < 1 || blockSize100k > 9 - error("H5Zbzip2: Invalid compression blocksize: $blockSize100k") - end - end - - # Prepare the output buffer - outbuflen = nbytes + nbytes ÷ 100 + 600 # worse case (bzip2 docs) - outbuflen <= 0 && - error("H5Zbzip2: Non-positive outbuflen for malloc: $outbuflen.") - outbuf = Libc.malloc(outbuflen) - @debug "Allocated" outbuflen outbuf - if outbuf == C_NULL - error("H5Zbzip2: Memory allocation failed for bzip2 compression") - end - - # Compress data - odatalen = outbuflen - r_odatalen = Ref{Cuint}(odatalen) - ret = BZ2_bzBuffToBuffCompress( - outbuf, r_odatalen, unsafe_load(buf), nbytes, blockSize100k, 0, 0 - ) - outdatalen = r_odatalen[] - if ret != CodecBzip2.BZ_OK - error("H5Zbzip2: bzip2 compression failed with error $ret.") - end - end # if flags & API.H5Z_FLAG_REVERSE != 0 - Libc.free(unsafe_load(buf)) - unsafe_store!(buf, outbuf) - unsafe_store!(buf_size, outbuflen) - - catch err - # "In the case of failure, the return value is 0 (zero) and all pointer arguments are left unchanged." - outdatalen = Csize_t(0) - if outbuf != C_NULL - Libc.free(outbuf) - end - @error "H5Zbzip2.jl Non-Fatal ERROR: " err - display(stacktrace(catch_backtrace())) - end # try - catch - - return Csize_t(outdatalen) -end # function H5Z_filter_bzip2 - -# Need stdcall for 32-bit Windows? -function BZ2_bzBuffToBuffCompress( - dest, destLen, source, sourceLen, blockSize100k, verbosity, workFactor -) - @static if CodecBzip2.WIN32 - return ccall( - ("BZ2_bzBuffToBuffCompress@28", libbzip2), - stdcall, - Cint, - (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint, Cint), - dest, - destLen, - source, - sourceLen, - blockSize100k, - verbosity, - workFactor - ) - else - return ccall( - (:BZ2_bzBuffToBuffCompress, libbzip2), - Cint, - (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint, Cint), - dest, - destLen, - source, - sourceLen, - blockSize100k, - verbosity, - workFactor - ) - end -end - -function BZ2_bzBuffToBuffDecompress(dest, destLen, source, sourceLen, small, verbosity) - @static if CodecBzip2.WIN32 - return ccall( - ("BZ2_bzBuffToBuffDecompress@24", libbzip2), - stdcall, - Cint, - (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint), - dest, - destLen, - source, - sourceLen, - small, - verbosity - ) - else - return ccall( - (:BZ2_bzBuffToBuffDecompress, libbzip2), - Cint, - (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint), - dest, - destLen, - source, - sourceLen, - small, - verbosity - ) - end -end - -# Filters Module - -""" - Bzip2Filter(blockSize100k) - -Apply Bzip2 compression. The filter id is $H5Z_FILTER_BZIP2. - -# External Links -* [BZIP2 HDF5 Filter ID 307](https://portal.hdfgroup.org/display/support/Filters#Filters-307) -* [PyTables Repository (C code)](https://github.com/PyTables/PyTables) -""" -struct Bzip2Filter <: Filter - blockSize100k::Cuint -end -Bzip2Filter() = Bzip2Filter(9) - -filterid(::Type{Bzip2Filter}) = H5Z_FILTER_BZIP2 -filtername(::Type{Bzip2Filter}) = bzip2_name -filter_func(::Type{Bzip2Filter}) = H5Z_filter_bzip2 -filter_cfunc(::Type{Bzip2Filter}) = @cfunction( - H5Z_filter_bzip2, - Csize_t, - (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) -) +const CodecBzip2Ext = Base.get_extension(HDF5, :CodecBzip2Ext) -function __init__() - register_filter(Bzip2Filter) -end +using .CodecBzip2Ext: H5Z_FILTER_BZIP2 +using .CodecBzip2Ext: H5Z_filter_bzip2 +using .CodecBzip2Ext: Bzip2Filter +using .CodecBzip2Ext: bzip2_name end # module H5Zbzip2 diff --git a/filters/H5Zlz4/Project.toml b/filters/H5Zlz4/Project.toml index 3ee255cf6..e8cea78a0 100644 --- a/filters/H5Zlz4/Project.toml +++ b/filters/H5Zlz4/Project.toml @@ -1,6 +1,6 @@ name = "H5Zlz4" uuid = "eb20ec05-5464-47b5-ba41-098e3c1068a3" -version = "0.1.1" +version = "0.2.0" [deps] CodecLz4 = "5ba52731-8f18-5e0d-9241-30f10d1ec561" @@ -9,4 +9,4 @@ HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" [compat] HDF5 = "0.17" CodecLz4 = "0.4" -julia = "1.3" +julia = "1.9" diff --git a/filters/H5Zlz4/src/H5Zlz4.jl b/filters/H5Zlz4/src/H5Zlz4.jl index b726666d1..158a2726a 100644 --- a/filters/H5Zlz4/src/H5Zlz4.jl +++ b/filters/H5Zlz4/src/H5Zlz4.jl @@ -1,236 +1,28 @@ #= -This is a port of H5Zlz4.c to Julia -https://github.com/HDFGroup/hdf5_plugins/blob/master/LZ4/src/H5Zlz4.c -https://github.com/nexusformat/HDF5-External-Filter-Plugins/blob/master/LZ4/src/H5Zlz4.c -https://github.com/silx-kit/hdf5plugin/blob/main/src/LZ4/H5Zlz4.c - -H5Zlz4 is originally a copyright of HDF Group. License: licenses/H5Zlz4_LICENSE.txt - -The following license applies to the Julia port. Copyright (c) 2021 Mark Kittisopikul and Howard Hughes Medical Institute. License MIT, see LICENSE.txt =# module H5Zlz4 using CodecLz4 +using HDF5: HDF5 + +# The next three lines can be removed using HDF5.API import HDF5.Filters: Filter, filterid, register_filter, filtername, filter_func, filter_cfunc export H5Z_FILTER_LZ4, H5Z_filter_lz4, Lz4Filter -const H5Z_FILTER_LZ4 = API.H5Z_filter_t(32004) - -const DEFAULT_BLOCK_SIZE = 1 << 30 -const lz4_name = "HDF5 lz4 filter; see http://www.hdfgroup.org/services/contributions.html" - -const LZ4_AGGRESSION = Ref(1) - -# flags H5Z_FLAG_REVERSE or H5Z_FLAG_OPTIONAL -# cd_nelmts number of elements in cd_values (0 or 1) -# cd_values the first optional element must be the blockSize -# nbytes - number of valid bytes of data -# buf_size - total size of buffer -# buf - pointer to pointer of data -function H5Z_filter_lz4( - flags::Cuint, - cd_nelmts::Csize_t, - cd_values::Ptr{Cuint}, - nbytes::Csize_t, - buf_size::Ptr{Csize_t}, - buf::Ptr{Ptr{Cvoid}} -)::Csize_t - outBuf = C_NULL - ret_value = Csize_t(0) - - try - if (flags & API.H5Z_FLAG_REVERSE) != 0 # reverse filter, decompressing - #i32Buf = Ref{UInt32}() - blockSize = UInt32(0) - roBuf = Ref{UInt8}() - rpos = Ptr{UInt8}(unsafe_load(buf)) - #i64Buf = Ptr{UInt64}(rpos) - # Load the first 8 bytes from buffer as a big endian UInt64 - # This is the original size of the buffer - origSize = ntoh(unsafe_load(Ptr{UInt64}(rpos))) - rpos += 8 # advance the pointer - - # Next read the next four bytes from the buffer as a big endian UInt32 - # This is the blocksize - #i32Buf[] = rpos - blockSize = ntoh(unsafe_load(Ptr{UInt32}(rpos))) - rpos += 4 - if blockSize > origSize - blockSize = origSize - end - - # malloc a byte buffer of origSize - # outBuf = Vector{UInt8}(undef, origSize) - @debug "OrigSize" origSize - origSize <= 0 && error("H5Zlz4: Non-positive origSize for malloc: $origSize") - outBuf = Libc.malloc(origSize) - outBuf == C_NULL && error("H5Zlz4: Could not allocate memory via malloc") - # Julia should throw an error if it cannot allocate this - roBuf = Ptr{UInt8}(outBuf) - decompSize = 0 - # Start with the first blockSize - while decompSize < origSize - # compressedBlockSize = UInt32(0) - if origSize - decompSize < blockSize # the last block can be smaller than block size - blockSize = origSize - decompSize - end - - #i32Buf[] = rpos - compressedBlockSize = ntoh(unsafe_load(Ptr{UInt32}(rpos))) - rpos += 4 - - if compressedBlockSize == blockSize - # There was no compression - # memcpy(roBuf, rpos, blockSize) - unsafe_copyto!(roBuf, rpos, blockSize) - decompressedBytes = blockSize - else - # do the compression - # LZ4_decompress_fast, version number 10300 ? - @debug "decompress_safe" rpos roBuf compressedBlockSize ( - origSize - decompSize - ) - decompressedBytes = CodecLz4.LZ4_decompress_safe( - rpos, roBuf, compressedBlockSize, origSize - decompSize - ) - @debug "decompressedBytes" decompressedBytes - end - - rpos += compressedBlockSize - roBuf += blockSize - decompSize += decompressedBytes - end - Libc.free(unsafe_load(buf)) - unsafe_store!(buf, outBuf) - outBuf = C_NULL - ret_value = Csize_t(origSize) - else - # forward filter - # compressing - #i64Buf = Ref{UInt64}() - #i32Buf = Ref{UInt32}() - - if nbytes > typemax(Int32) - error("Can only compress chunks up to 2GB") - end - blockSize = unsafe_load(cd_values) - if cd_nelmts > 0 && blockSize > 0 - else - blockSize = DEFAULT_BLOCK_SIZE - end - if blockSize > nbytes - blockSize = nbytes - end - nBlocks = (nbytes - 1) ÷ blockSize + 1 - maxDestSize = - nBlocks * CodecLz4.LZ4_compressBound(blockSize) + 4 + 8 + nBlocks * 4 - maxDestSize <= 0 && - error("H5Zlz4: Non-positive maxDestSize for malloc: $maxDestSize") - outBuf = Libc.malloc(maxDestSize) - outBuf == C_NULL && error("H5Zlz4: Could not allocate memory via malloc") - - rpos = Ptr{UInt8}(unsafe_load(buf)) - roBuf = Ptr{UInt8}(outBuf) - - # Header - unsafe_store!(Ptr{UInt64}(roBuf), hton(UInt64(nbytes))) - roBuf += 8 +const CodecLz4Ext = Base.get_extension(HDF5, :CodecLz4Ext) - unsafe_store!(Ptr{UInt32}(roBuf), hton(UInt32(blockSize))) - roBuf += 4 +using .CodecLz4Ext: H5Z_filter_lz4 +using .CodecLz4Ext: Lz4Filter - outSize = 12 +using .CodecLz4Ext: H5Z_FILTER_LZ4 - for block in 0:(nBlocks - 1) - # compBlockSize::UInt32 - origWritten = Csize_t(block * blockSize) - if nbytes - origWritten < blockSize # the last block may be < blockSize - blockSize = nbytes - origWritten - end +using .CodecLz4Ext: DEFAULT_BLOCK_SIZE +using .CodecLz4Ext: lz4_name - # aggression = 1 is the same LZ4_compress_default - @debug "LZ4_compress_fast args" rpos outBuf roBuf roBuf + 4 blockSize nBlocks CodecLz4.LZ4_compressBound( - blockSize - ) - compBlockSize = UInt32( - CodecLz4.LZ4_compress_fast( - rpos, - roBuf + 4, - blockSize, - CodecLz4.LZ4_compressBound(blockSize), - LZ4_AGGRESSION[] - ) - ) - @debug "Compressed block size" compBlockSize - - if compBlockSize == 0 - error("Could not compress block $block") - end - - if compBlockSize >= blockSize # compression did not save any space, do a memcpy instead - compBlockSize = blockSize - unsafe_copyto!(roBuf + 4, rpos, blockSize) - end - - unsafe_store!(Ptr{UInt32}(roBuf), hton(UInt32(compBlockSize))) # write blocksize - roBuf += 4 - - rpos += blockSize - roBuf += compBlockSize - outSize += compBlockSize + 4 - end - - Libc.free(unsafe_load(buf)) - unsafe_store!(buf, outBuf) - unsafe_store!(buf_size, outSize) - outBuf = C_NULL - ret_value = Csize_t(outSize) - end # (flags & API.H5Z_FLAG_REVERSE) != 0 - - catch err - # "In the case of failure, the return value is 0 (zero) and all pointer arguments are left unchanged." - ret_value = Csize_t(0) - @async @error "H5Zlz4.jl Non-Fatal ERROR: " err - display(stacktrace(catch_backtrace())) - finally - if outBuf != C_NULL - Libc.free(outBuf) - end - end - return Csize_t(ret_value) -end - -# Filters Module - -""" - Lz4Filter(blockSize) - -Apply LZ4 compression. `blockSize` is the main argument. The filter id is $H5Z_FILTER_LZ4. - -# External Links -* [LZ4 HDF5 Filter ID 32004](https://portal.hdfgroup.org/display/support/Filters#Filters-32004) -* [LZ4 HDF5 Plugin Repository (C code)](https://github.com/nexusformat/HDF5-External-Filter-Plugins/tree/master/LZ4) -""" -struct Lz4Filter <: Filter - blockSize::Cuint -end -Lz4Filter() = Lz4Filter(DEFAULT_BLOCK_SIZE) - -filterid(::Type{Lz4Filter}) = H5Z_FILTER_LZ4 -filtername(::Type{Lz4Filter}) = lz4_name -filter_func(::Type{Lz4Filter}) = H5Z_filter_lz4 -filter_cfunc(::Type{Lz4Filter}) = @cfunction( - H5Z_filter_lz4, - Csize_t, - (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) -) - -function __init__() - register_filter(Lz4Filter) -end +using .CodecLz4Ext: LZ4_AGGRESSION end diff --git a/filters/H5Zzstd/Project.toml b/filters/H5Zzstd/Project.toml index 2f4c1256c..08156d09b 100644 --- a/filters/H5Zzstd/Project.toml +++ b/filters/H5Zzstd/Project.toml @@ -1,6 +1,6 @@ name = "H5Zzstd" uuid = "f6f2d980-1ec6-471c-a70d-0270e22f1103" -version = "0.1.2" +version = "0.2.0" [deps] CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2" @@ -9,4 +9,4 @@ HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" [compat] HDF5 = "0.17" CodecZstd = "0.7, 0.8" -julia = "1.3" +julia = "1.9" diff --git a/filters/H5Zzstd/README.md b/filters/H5Zzstd/README.md index 1c6dff122..b6b4ff0f9 100644 --- a/filters/H5Zzstd/README.md +++ b/filters/H5Zzstd/README.md @@ -3,4 +3,11 @@ Implements the Zstd filter for [HDF5.jl](https://github.com/JuliaIO/HDF5.jl) in Julia. See the [documentation](https://juliaio.github.io/HDF5.jl/stable/filters/#H5Zzstd.jl) -This implements [HDF5 ZStandard Filter 32015](https://portal.hdfgroup.org/display/support/Filters#Filters-32015) \ No newline at end of file +This implements [HDF5 ZStandard Filter 32015](https://portal.hdfgroup.org/display/support/Filters#Filters-32015) + +This is a transitional package as the contents of this package are now +implemented by `CodecZstdExt`, an extension package to HDF5 that loads +when CodecZstd.jl is loaded. + +Loading this package will trigger loading of the extension since this +package loads both HDF5.jl and CodecZstd.jl. diff --git a/filters/H5Zzstd/src/H5Zzstd.jl b/filters/H5Zzstd/src/H5Zzstd.jl index e0ea75499..dded8c896 100644 --- a/filters/H5Zzstd/src/H5Zzstd.jl +++ b/filters/H5Zzstd/src/H5Zzstd.jl @@ -1,128 +1,24 @@ -#= -Derived from https://github.com/aparamon/HDF5Plugin-Zstandard, zstd_h5plugin.c -Licensed under Apache License Version 2.0, see licenses/H5Zzstd_LICENSE.txt - -The following license applies to the Julia port. -Copyright (c) 2021 Mark Kittisopikul and Howard Hughes Medical Institute. License MIT, see LICENSE.txt -=# -module H5Zzstd - -using CodecZstd -import CodecZstd.LibZstd -using HDF5.API -import HDF5.Filters: - Filter, filterid, register_filter, filterid, filtername, filter_func, filter_cfunc - -const H5Z_FILTER_ZSTD = API.H5Z_filter_t(32015) -const zstd_name = "Zstandard compression: http://www.zstd.net" - -export H5Z_filter_zstd, H5Z_FILTER_ZSTD, ZstdFilter - -# cd_values First optional value is the compressor aggression -# Default is CodecZstd.LibZstd.ZSTD_CLEVEL_DEFAULT -function H5Z_filter_zstd( - flags::Cuint, - cd_nelmts::Csize_t, - cd_values::Ptr{Cuint}, - nbytes::Csize_t, - buf_size::Ptr{Csize_t}, - buf::Ptr{Ptr{Cvoid}} -)::Csize_t - inbuf = unsafe_load(buf) - outbuf = C_NULL - origSize = nbytes - ret_value = Csize_t(0) - - try - if flags & API.H5Z_FLAG_REVERSE != 0 - #decompresssion - - decompSize = LibZstd.ZSTD_getDecompressedSize(inbuf, origSize) - if decompSize == 0 - error("zstd_h5plugin: Cannot retrieve decompressed chunk size") - end - outbuf = Libc.malloc(decompSize) - if outbuf == C_NULL - error( - "zstd_h5plugin: Cannot allocate memory for outbuf during decompression." - ) - end - decompSize = LibZstd.ZSTD_decompress(outbuf, decompSize, inbuf, origSize) - Libc.free(inbuf) - unsafe_store!(buf, outbuf) - outbuf = C_NULL - ret_value = Csize_t(decompSize) - else - # compression - - if cd_nelmts > 0 - aggression = Cint(unsafe_load(cd_values)) - else - aggression = CodecZstd.LibZstd.ZSTD_CLEVEL_DEFAULT - end - - if aggression < 1 - aggression = 1 # ZSTD_minCLevel() - elseif aggression > LibZstd.ZSTD_maxCLevel() - aggression = LibZstd.ZSTD_maxCLevel() - end - - compSize = LibZstd.ZSTD_compressBound(origSize) - outbuf = Libc.malloc(compSize) - if outbuf == C_NULL - error( - "zstd_h5plugin: Cannot allocate memory for outbuf during compression." - ) - end - - compSize = LibZstd.ZSTD_compress(outbuf, compSize, inbuf, origSize, aggression) - - Libc.free(unsafe_load(buf)) - unsafe_store!(buf, outbuf) - unsafe_store!(buf_size, compSize) - outbuf = C_NULL - ret_value = compSize - end - catch e - # "In the case of failure, the return value is 0 (zero) and all pointer arguments are left unchanged." - ret_value = Csize_t(0) - # Output Julia error via async so we do not task switch during callback - @async @error "H5Zzstd Non-Fatal ERROR: " exception = (e, catch_backtrace()) - finally - if outbuf != C_NULL - Libc.free(outbuf) - end - end # try catch finally - return Csize_t(ret_value) -end +""" + H5Zzstd -# Filters Module +Transitional package to HDF5/CodecZstdExt. +The contents of this package are now contained within the package extension +CodecZstdExt. Loading this package will load the package extension. """ - ZstdFilter(clevel) +module H5Zzstd -Zstandard compression filter. `clevel` determines the compression level. +using HDF5: HDF5 +using CodecZstd: CodecZstd +const CodecZstdExt = Base.get_extension(HDF5, :CodecZstdExt) -# External Links -* [Zstandard HDF5 Filter ID 32015](https://portal.hdfgroup.org/display/support/Filters#Filters-32015) -* [Zstandard HDF5 Plugin Repository (C code)](https://github.com/aparamon/HDF5Plugin-Zstandard) -""" -struct ZstdFilter <: Filter - clevel::Cuint -end -ZstdFilter() = ZstdFilter(CodecZstd.LibZstd.ZSTD_CLEVEL_DEFAULT) +using .CodecZstdExt: H5Z_FILTER_ZSTD +using .CodecZstdExt: zstd_name -filterid(::Type{ZstdFilter}) = H5Z_FILTER_ZSTD -filtername(::Type{ZstdFilter}) = zstd_name -filter_func(::Type{ZstdFilter}) = H5Z_filter_zstd -filter_cfunc(::Type{ZstdFilter}) = @cfunction( - H5Z_filter_zstd, - Csize_t, - (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) -) +using .CodecZstdExt: H5Z_filter_zstd +using .CodecZstdExt: H5Z_FILTER_ZSTD +using .CodecZstdExt: ZstdFilter -function __init__() - register_filter(ZstdFilter) -end +export H5Z_filter_zstd, H5Z_FILTER_ZSTD, ZstdFilter end # module H5Zzstd diff --git a/test/runtests.jl b/test/runtests.jl index de9021459..268f0da51 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -16,14 +16,18 @@ filter_path = joinpath(dirname(pathof(HDF5)), "..", "filters") if !Base.BinaryPlatforms.CPUID.test_cpu_feature(Base.BinaryPlatforms.CPUID.JL_X86_avx2) Pkg.add(PackageSpec(; name="Blosc_jll", version=v"1.21.2+0")) end -Pkg.develop([ - PackageSpec(; path=joinpath(filter_path, "H5Zblosc")), - PackageSpec(; path=joinpath(filter_path, "H5Zbzip2")), - PackageSpec(; path=joinpath(filter_path, "H5Zlz4")), - PackageSpec(; path=joinpath(filter_path, "H5Zzstd")), -]) -@static if VERSION >= v"1.6" - Pkg.develop(PackageSpec(; path=joinpath(filter_path, "H5Zbitshuffle"))) +@static if VERSION >= v"1.9" + Pkg.develop([ + PackageSpec(; path=joinpath(filter_path, "H5Zblosc")), + PackageSpec(; path=joinpath(filter_path, "H5Zbzip2")), + PackageSpec(; path=joinpath(filter_path, "H5Zlz4")), + PackageSpec(; path=joinpath(filter_path, "H5Zzstd")), + PackageSpec(; path=joinpath(filter_path, "H5Zbitshuffle")), + ]) +elseif VERSION >= v"1.6" + Pkg.add(["H5Zblosc", "H5Zbzip2", "H5Zlz4", "H5Zzstd", "H5Zbitshuffle"]) +else + Pkg.add(["H5Zblosc", "H5Zbzip2", "H5Zlz4", "H5Zzstd"]) end @info "libhdf5 v$(HDF5.API.h5_get_libversion())"