Merge pull request #20 from psrenergy/gb/file-to-array

add file_to_array and arra_to_file
psrenergy · Sep 13, 2024 · 0e81bfd · 0e81bfd · guilhermebodin · Sep 13, 2024
2 parents 250aee1 + 3e20bfd
commit 0e81bfd
Show file tree

Hide file tree

Showing 5 changed files with 179 additions and 7 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Quiver"
 uuid = "cdbb3f72-2527-4dbd-9d0e-93533a5519ac"
 authors = ["raphasampaio", "guilhermebodin"]
-version = "0.1.3"
+version = "0.1.4"
 
 [deps]
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"

diff --git a/src/binary.jl b/src/binary.jl
@@ -141,7 +141,7 @@ function _quiver_goto!(reader::Quiver.Reader{binary})
 end
 
 function _quiver_next_dimension!(reader::Quiver.Reader{binary})
-    error("Not implemented")
+    error("`next_dimension!`: not implemented for the `binary` implementaiton, use `goto` instead.")
     return nothing
 end
 

diff --git a/src/reader.jl b/src/reader.jl
@@ -96,4 +96,44 @@ end
 function close!(reader::Reader)
     _quiver_close!(reader)
     return nothing
+end
+
+"""
+    file_to_array(
+        filename::String,
+        implementation::Type{I};
+        labels_to_read::Vector{String} = String[],
+    ) where {I <: Implementation}
+
+Reads a file and returns the data and metadata as a tuple.
+"""
+function file_to_array(
+    filename::String,
+    implementation::Type{I};
+    labels_to_read::Vector{String} = String[],
+) where {I <: Implementation}
+    reader = Reader{I}(
+        filename;
+        labels_to_read,
+        carrousel = false, # carrousel does not make sense in this implemetations
+    )
+
+    metadata = reader.metadata
+    dimension_names = reverse(metadata.dimensions)
+    dimension_sizes = reverse(metadata.dimension_size)
+    data = zeros(
+        Float32,
+        length(reader.labels_to_read),
+        dimension_sizes...,
+    )
+
+    for dims in Iterators.product([1:size for size in dimension_sizes]...)
+        dim_kwargs = OrderedDict(Symbol.(dimension_names) .=> dims)
+        Quiver.goto!(reader; dim_kwargs...)
+        data[:, dims...] = reader.data
+    end
+
+    Quiver.close!(reader)
+
+    return data, metadata
 end
diff --git a/src/writer.jl b/src/writer.jl
@@ -31,4 +31,89 @@ end
 function close!(writer::Writer)
     _quiver_close!(writer)
     return nothing
+end
+
+"""
+    array_to_file(
+        filename::String,
+        data::Array{T, N},
+        implementation::Type{I};
+        dimensions::Vector{String},
+        labels::Vector{String},
+        time_dimension::String,
+        dimension_size::Vector{Int},
+        initial_date::Union{String, DateTime} = "",
+        unit::String = "",
+        round_digits::Union{Int, Nothing} = nothing,
+    ) where {I <:Implementation, T, N}
+
+Write a time series file in Quiver format.
+
+Required arguments:
+
+  - `file_path::String`: Path to file.
+  - `data::Array{T, N}`: Data to be written.
+  - `implementation::Type{I}`: Implementation to be used. It can be `Quiver.csv` or `Quiver.binary`.
+  - `dimensions::Vector{String}`: Dimensions of the data.
+  - `labels::Vector{String}`: Labels of the data.
+  - `time_dimension::String`: Name of the time dimension.
+  - `dimension_size::Vector{Int}`: Size of each dimension.
+  - `initial_date::Union{String, DateTime}`: Initial date of the time series. If a string is provided, it should be in the format "yyyy-mm-ddTHH:MM:SS".
+
+Optional arguments:
+  - `round_digits::Union{Int, Nothing}`: Number of digits to round the data. If nothing is provided, the data is not rounded.
+  - `unit::String`: Unit of the time series data.
+"""
+function array_to_file(
+    filename::String,
+    data::Array{T, N},
+    implementation::Type{I};
+    dimensions::Vector{String},
+    labels::Vector{String},
+    time_dimension::String,
+    dimension_size::Vector{Int},
+    initial_date::Union{String, DateTime} = "",
+    unit::String = "",
+    digits::Union{Int, Nothing} = nothing,
+) where {I <: Implementation, T, N}
+    kwargs_dict = Dict{Symbol, Any}()
+    if initial_date !== ""
+        if isa(initial_date, String)
+            initial_date = DateTime(initial_date, "yyyy-mm-ddTHH:MM:SS")
+        end
+        kwargs_dict[:initial_date] = initial_date
+    end
+    if unit != ""
+        kwargs_dict[:unit] = unit
+    else
+        @warn("No unit was provided for the time series file \"$filename\".")
+    end
+
+    writer = Quiver.Writer{implementation}(
+        filename;
+        dimensions,
+        labels,
+        time_dimension,
+        dimension_size,
+        kwargs_dict...,
+    )
+
+    reverse_dimensions = Symbol.(reverse(dimensions))
+
+    for dims in Iterators.product([1:size for size in reverse(dimension_size)]...)
+        dim_kwargs = OrderedDict(reverse_dimensions .=> dims)
+        Quiver.write!(writer, round_digits(data[:, dims...], digits); dim_kwargs...)
+    end
+
+    Quiver.close!(writer)
+
+    return nothing
+end
+
+function round_digits(vec::Vector{T}, ::Nothing) where {T}
+    return vec
+end
+
+function round_digits(vec::Vector{T}, digits::Int) where {T}
+    return round.(vec, digits)
 end
diff --git a/test/test_read_write.jl b/test/test_read_write.jl
@@ -948,6 +948,52 @@ function read_write_out_of_order_kwargs(impl)
     rm("$filename.toml")
 end
 
+function read_file_to_array(impl)
+    filename = joinpath(@__DIR__, "test_read_file_to_array")
+
+    initial_date = DateTime(2006, 1, 1)
+    num_stages = 4
+    dates = collect(initial_date:Dates.Month(1):initial_date + Dates.Month(num_stages - 1))
+    num_scenarios = 3
+    num_blocks_per_stage = Int32.(Dates.daysinmonth.(dates) .* 24)
+    num_time_series = 3
+
+    dimensions = ["stage", "scenario", "block"]
+    labels = ["agent_$i" for i in 1:num_time_series]
+    time_dimension = "stage"
+    dimension_size = [num_stages, num_scenarios, maximum(num_blocks_per_stage)]
+
+    data = zeros(num_time_series, maximum(num_blocks_per_stage), num_scenarios, num_stages)
+    for stage in 1:num_stages
+        for scenario in 1:num_scenarios
+            for block in 1:num_blocks_per_stage[stage]
+                for i in 1:num_time_series
+                    data[i, block, scenario, stage] = stage + scenario + block + i
+                end
+            end
+        end
+    end
+
+    Quiver.array_to_file(
+        filename,
+        data,
+        impl;
+        dimensions,
+        labels,
+        time_dimension,
+        dimension_size,
+        initial_date
+    )
+
+    data_read, metadata = Quiver.file_to_array(filename, impl)
+
+    @test size(data) == size(data_read)
+
+    for i in eachindex(data)
+        @test data[i] == data_read[i]
+    end
+end
+
 function test_read_write_implementations()
     for impl in Quiver.implementations()
         @testset "Read and Write $(impl)" begin
@@ -956,18 +1002,19 @@ function test_read_write_implementations()
             read_write_3(impl)
             read_write_4(impl)
             read_write_5(impl)
-            if impl == Quiver.csv
-                read_write_goto_csv_1()
-                read_write_goto_csv_2()
-                read_write_goto_csv_3()
-            end
             read_write_carrousel(impl)
             read_outside_bounds_1(impl)
             read_outside_bounds_2(impl)
             read_outside_bounds_3(impl)
             read_outside_bounds_4(impl)
             read_filtering_labels(impl)
             read_write_out_of_order_kwargs(impl)
+            read_file_to_array(impl)
+            if impl == Quiver.csv
+                read_write_goto_csv_1()
+                read_write_goto_csv_2()
+                read_write_goto_csv_3()
+            end
         end
     end
 end