From c04eecfc1b0519b2726407b528e40fae794acef1 Mon Sep 17 00:00:00 2001 From: guilhermebodin Date: Fri, 13 Sep 2024 10:37:16 -0300 Subject: [PATCH 1/3] add file_to_array and arra_to_file --- src/binary.jl | 2 +- src/reader.jl | 40 ++++++++++++++++++++ src/writer.jl | 84 +++++++++++++++++++++++++++++++++++++++++ test/test_read_write.jl | 55 ++++++++++++++++++++++++--- 4 files changed, 175 insertions(+), 6 deletions(-) diff --git a/src/binary.jl b/src/binary.jl index 50b71e1..bbf6dcc 100644 --- a/src/binary.jl +++ b/src/binary.jl @@ -141,7 +141,7 @@ function _quiver_goto!(reader::Quiver.Reader{binary}) end function _quiver_next_dimension!(reader::Quiver.Reader{binary}) - error("Not implemented") + error("`next_dimension!`: not implemented for the `binary` implementaiton, use `goto` instead.") return nothing end diff --git a/src/reader.jl b/src/reader.jl index f877660..732157e 100644 --- a/src/reader.jl +++ b/src/reader.jl @@ -96,4 +96,44 @@ end function close!(reader::Reader) _quiver_close!(reader) return nothing +end + +""" + file_to_array( + filename::String, + implementation::DataType; + labels_to_read::Vector{String} = String[], + ) + +Reads a file and returns the data and metadata as a tuple. +""" +function file_to_array( + filename::String, + implementation::DataType; + labels_to_read::Vector{String} = String[], +) + reader = Reader{implementation}( + filename; + labels_to_read, + carrousel = false, # carrousel does not make sense in this implemetations + ) + + metadata = reader.metadata + dimension_names = reverse(metadata.dimensions) + dimension_sizes = reverse(metadata.dimension_size) + data = zeros( + Float32, + length(reader.labels_to_read), + dimensions_sizes..., + ) + + for dims in Iterators.product([1:size for size in dimension_sizes]...) + dim_kwargs = OrderedDict(Symbol.(dimension_names) .=> dims) + Quiver.goto!(reader; dim_kwargs...) + data[:, dims...] = reader.data + end + + Quiver.close!(reader) + + return data, metadata end \ No newline at end of file diff --git a/src/writer.jl b/src/writer.jl index 2e79c9d..242c68e 100644 --- a/src/writer.jl +++ b/src/writer.jl @@ -31,4 +31,88 @@ end function close!(writer::Writer) _quiver_close!(writer) return nothing +end + +""" + array_to_file( + filename::String, + data::Array{T, N}, + implementation::DataType; + dimensions::Vector{String}, + labels::Vector{String}, + time_dimension::String, + dimension_size::Vector{Int}, + initial_date::Union{String, DateTime} = "", + unit::String = "", + round_digits::Union{Int, Nothing} = nothing, + ) where {T, N} + +Write a time series file in Quiver format. + +Required arguments: + + - `file_path::String`: Path to file. + - `data::Array{T, N}`: Data to be written. + - `dimensions::Vector{String}`: Dimensions of the data. + - `labels::Vector{String}`: Labels of the data. + - `time_dimension::String`: Name of the time dimension. + - `dimension_size::Vector{Int}`: Size of each dimension. + - `initial_date::Union{String, DateTime}`: Initial date of the time series. If a string is provided, it should be in the format "yyyy-mm-ddTHH:MM:SS". + +Optional arguments: + - `round_digits::Union{Int, Nothing}`: Number of digits to round the data. If nothing is provided, the data is not rounded. + - `unit::String`: Unit of the time series data. +""" +function array_to_file( + filename::String, + data::Array{T, N}, + implementation::DataType; + dimensions::Vector{String}, + labels::Vector{String}, + time_dimension::String, + dimension_size::Vector{Int}, + initial_date::Union{String, DateTime} = "", + unit::String = "", + round_digits::Union{Int, Nothing} = nothing, +) where {T, N} + kwargs_dict = Dict{Symbol, Any}() + if initial_date !== "" + if isa(initial_date, String) + initial_date = DateTime(initial_date, "yyyy-mm-ddTHH:MM:SS") + end + kwargs_dict[:initial_date] = initial_date + end + if unit != "" + kwargs_dict[:unit] = unit + else + @warn("No unit was provided for the time series file \"$filename\".") + end + + writer = Quiver.Writer{implementation}( + filename; + dimensions, + labels, + time_dimension, + dimension_size, + kwargs_dict..., + ) + + reverse_dimensions = Symbol.(reverse(dimensions)) + + for dims in Iterators.product([1:size for size in reverse(dimension_size)]...) + dim_kwargs = OrderedDict(reverse_dimensions .=> dims) + Quiver.write!(writer, round_digits(data[:, dims...]); dim_kwargs...) + end + + Quiver.close!(writer) + + return nothing +end + +function round_digits(vec::Vector{T}, ::Nothing) where {T} + return vec +end + +function round_digits(vec::Vector{T}, round_digits::Int) where {T} + return round.(vec, digits = round_digits) end \ No newline at end of file diff --git a/test/test_read_write.jl b/test/test_read_write.jl index 28d42f3..9cb963a 100644 --- a/test/test_read_write.jl +++ b/test/test_read_write.jl @@ -948,6 +948,51 @@ function read_write_out_of_order_kwargs(impl) rm("$filename.toml") end +function read_file_to_array(impl) + filename = joinpath(@__DIR__, "test_read_file_to_array") + + initial_date = DateTime(2006, 1, 1) + num_stages = 10 + dates = collect(initial_date:Dates.Month(1):initial_date + Dates.Month(num_stages - 1)) + num_scenarios = 12 + num_blocks_per_stage = Int32.(Dates.daysinmonth.(dates) .* 24) + num_time_series = 3 + + dimensions = ["stage", "scenario", "block"] + labels = ["agent_$i" for i in 1:num_time_series] + time_dimension = "stage" + dimension_size = [num_stages, num_scenarios, maximum(num_blocks_per_stage)] + + data = zeros(num_time_series, maximum(num_blocks_per_stage), num_scenarios, num_stages) + for stage in 1:num_stages + for scenario in 1:num_scenarios + for block in 1:num_blocks_per_stage[stage] + for i in 1:num_time_series + data[i, block, scenario, stage] = stage + scenario + block + i + end + end + end + end + + Quiver.array_to_file( + filename, + data, + impl, + dimensions, + labels, + time_dimension, + initial_date + ) + + data_read, metadata = Quiver.file_to_array(filename, impl) + + @test size(data) == size(data_read) + + for i in eachindex(data) + @test data[i] == data_read[i] + end +end + function test_read_write_implementations() for impl in Quiver.implementations() @testset "Read and Write $(impl)" begin @@ -956,11 +1001,6 @@ function test_read_write_implementations() read_write_3(impl) read_write_4(impl) read_write_5(impl) - if impl == Quiver.csv - read_write_goto_csv_1() - read_write_goto_csv_2() - read_write_goto_csv_3() - end read_write_carrousel(impl) read_outside_bounds_1(impl) read_outside_bounds_2(impl) @@ -968,6 +1008,11 @@ function test_read_write_implementations() read_outside_bounds_4(impl) read_filtering_labels(impl) read_write_out_of_order_kwargs(impl) + if impl == Quiver.csv + read_write_goto_csv_1() + read_write_goto_csv_2() + read_write_goto_csv_3() + end end end end From ca2e8621dc7fd7494892065070c8253ad166ee86 Mon Sep 17 00:00:00 2001 From: guilhermebodin Date: Fri, 13 Sep 2024 10:38:03 -0300 Subject: [PATCH 2/3] bump version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 52167f8..3b17fed 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Quiver" uuid = "cdbb3f72-2527-4dbd-9d0e-93533a5519ac" authors = ["raphasampaio", "guilhermebodin"] -version = "0.1.3" +version = "0.1.4" [deps] CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" From 3e20bfd1efa1f790085cfb9bdf036f7a770d4fa8 Mon Sep 17 00:00:00 2001 From: guilhermebodin Date: Fri, 13 Sep 2024 10:51:05 -0300 Subject: [PATCH 3/3] updates --- src/reader.jl | 12 ++++++------ src/writer.jl | 17 +++++++++-------- test/test_read_write.jl | 14 ++++++++------ 3 files changed, 23 insertions(+), 20 deletions(-) diff --git a/src/reader.jl b/src/reader.jl index 732157e..8a7d9b6 100644 --- a/src/reader.jl +++ b/src/reader.jl @@ -101,18 +101,18 @@ end """ file_to_array( filename::String, - implementation::DataType; + implementation::Type{I}; labels_to_read::Vector{String} = String[], - ) + ) where {I <: Implementation} Reads a file and returns the data and metadata as a tuple. """ function file_to_array( filename::String, - implementation::DataType; + implementation::Type{I}; labels_to_read::Vector{String} = String[], -) - reader = Reader{implementation}( +) where {I <: Implementation} + reader = Reader{I}( filename; labels_to_read, carrousel = false, # carrousel does not make sense in this implemetations @@ -124,7 +124,7 @@ function file_to_array( data = zeros( Float32, length(reader.labels_to_read), - dimensions_sizes..., + dimension_sizes..., ) for dims in Iterators.product([1:size for size in dimension_sizes]...) diff --git a/src/writer.jl b/src/writer.jl index 242c68e..a8c1bd3 100644 --- a/src/writer.jl +++ b/src/writer.jl @@ -37,7 +37,7 @@ end array_to_file( filename::String, data::Array{T, N}, - implementation::DataType; + implementation::Type{I}; dimensions::Vector{String}, labels::Vector{String}, time_dimension::String, @@ -45,7 +45,7 @@ end initial_date::Union{String, DateTime} = "", unit::String = "", round_digits::Union{Int, Nothing} = nothing, - ) where {T, N} + ) where {I <:Implementation, T, N} Write a time series file in Quiver format. @@ -53,6 +53,7 @@ Required arguments: - `file_path::String`: Path to file. - `data::Array{T, N}`: Data to be written. + - `implementation::Type{I}`: Implementation to be used. It can be `Quiver.csv` or `Quiver.binary`. - `dimensions::Vector{String}`: Dimensions of the data. - `labels::Vector{String}`: Labels of the data. - `time_dimension::String`: Name of the time dimension. @@ -66,15 +67,15 @@ Optional arguments: function array_to_file( filename::String, data::Array{T, N}, - implementation::DataType; + implementation::Type{I}; dimensions::Vector{String}, labels::Vector{String}, time_dimension::String, dimension_size::Vector{Int}, initial_date::Union{String, DateTime} = "", unit::String = "", - round_digits::Union{Int, Nothing} = nothing, -) where {T, N} + digits::Union{Int, Nothing} = nothing, +) where {I <: Implementation, T, N} kwargs_dict = Dict{Symbol, Any}() if initial_date !== "" if isa(initial_date, String) @@ -101,7 +102,7 @@ function array_to_file( for dims in Iterators.product([1:size for size in reverse(dimension_size)]...) dim_kwargs = OrderedDict(reverse_dimensions .=> dims) - Quiver.write!(writer, round_digits(data[:, dims...]); dim_kwargs...) + Quiver.write!(writer, round_digits(data[:, dims...], digits); dim_kwargs...) end Quiver.close!(writer) @@ -113,6 +114,6 @@ function round_digits(vec::Vector{T}, ::Nothing) where {T} return vec end -function round_digits(vec::Vector{T}, round_digits::Int) where {T} - return round.(vec, digits = round_digits) +function round_digits(vec::Vector{T}, digits::Int) where {T} + return round.(vec, digits) end \ No newline at end of file diff --git a/test/test_read_write.jl b/test/test_read_write.jl index 9cb963a..6bed0d5 100644 --- a/test/test_read_write.jl +++ b/test/test_read_write.jl @@ -952,9 +952,9 @@ function read_file_to_array(impl) filename = joinpath(@__DIR__, "test_read_file_to_array") initial_date = DateTime(2006, 1, 1) - num_stages = 10 + num_stages = 4 dates = collect(initial_date:Dates.Month(1):initial_date + Dates.Month(num_stages - 1)) - num_scenarios = 12 + num_scenarios = 3 num_blocks_per_stage = Int32.(Dates.daysinmonth.(dates) .* 24) num_time_series = 3 @@ -977,10 +977,11 @@ function read_file_to_array(impl) Quiver.array_to_file( filename, data, - impl, - dimensions, - labels, - time_dimension, + impl; + dimensions, + labels, + time_dimension, + dimension_size, initial_date ) @@ -1008,6 +1009,7 @@ function test_read_write_implementations() read_outside_bounds_4(impl) read_filtering_labels(impl) read_write_out_of_order_kwargs(impl) + read_file_to_array(impl) if impl == Quiver.csv read_write_goto_csv_1() read_write_goto_csv_2()