Skip to content

Commit

Permalink
Merge pull request #20 from psrenergy/gb/file-to-array
Browse files Browse the repository at this point in the history
add file_to_array and arra_to_file
  • Loading branch information
guilhermebodin authored Sep 13, 2024
2 parents 250aee1 + 3e20bfd commit 0e81bfd
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 7 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Quiver"
uuid = "cdbb3f72-2527-4dbd-9d0e-93533a5519ac"
authors = ["raphasampaio", "guilhermebodin"]
version = "0.1.3"
version = "0.1.4"

[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
Expand Down
2 changes: 1 addition & 1 deletion src/binary.jl
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ function _quiver_goto!(reader::Quiver.Reader{binary})
end

function _quiver_next_dimension!(reader::Quiver.Reader{binary})
error("Not implemented")
error("`next_dimension!`: not implemented for the `binary` implementaiton, use `goto` instead.")
return nothing
end

Expand Down
40 changes: 40 additions & 0 deletions src/reader.jl
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,44 @@ end
function close!(reader::Reader)
_quiver_close!(reader)
return nothing
end

"""
file_to_array(
filename::String,
implementation::Type{I};
labels_to_read::Vector{String} = String[],
) where {I <: Implementation}
Reads a file and returns the data and metadata as a tuple.
"""
function file_to_array(
filename::String,
implementation::Type{I};
labels_to_read::Vector{String} = String[],
) where {I <: Implementation}
reader = Reader{I}(
filename;
labels_to_read,
carrousel = false, # carrousel does not make sense in this implemetations
)

metadata = reader.metadata
dimension_names = reverse(metadata.dimensions)
dimension_sizes = reverse(metadata.dimension_size)
data = zeros(
Float32,
length(reader.labels_to_read),
dimension_sizes...,
)

for dims in Iterators.product([1:size for size in dimension_sizes]...)
dim_kwargs = OrderedDict(Symbol.(dimension_names) .=> dims)
Quiver.goto!(reader; dim_kwargs...)
data[:, dims...] = reader.data
end

Quiver.close!(reader)

return data, metadata
end
85 changes: 85 additions & 0 deletions src/writer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,89 @@ end
function close!(writer::Writer)
_quiver_close!(writer)
return nothing
end

"""
array_to_file(
filename::String,
data::Array{T, N},
implementation::Type{I};
dimensions::Vector{String},
labels::Vector{String},
time_dimension::String,
dimension_size::Vector{Int},
initial_date::Union{String, DateTime} = "",
unit::String = "",
round_digits::Union{Int, Nothing} = nothing,
) where {I <:Implementation, T, N}
Write a time series file in Quiver format.
Required arguments:
- `file_path::String`: Path to file.
- `data::Array{T, N}`: Data to be written.
- `implementation::Type{I}`: Implementation to be used. It can be `Quiver.csv` or `Quiver.binary`.
- `dimensions::Vector{String}`: Dimensions of the data.
- `labels::Vector{String}`: Labels of the data.
- `time_dimension::String`: Name of the time dimension.
- `dimension_size::Vector{Int}`: Size of each dimension.
- `initial_date::Union{String, DateTime}`: Initial date of the time series. If a string is provided, it should be in the format "yyyy-mm-ddTHH:MM:SS".
Optional arguments:
- `round_digits::Union{Int, Nothing}`: Number of digits to round the data. If nothing is provided, the data is not rounded.
- `unit::String`: Unit of the time series data.
"""
function array_to_file(
filename::String,
data::Array{T, N},
implementation::Type{I};
dimensions::Vector{String},
labels::Vector{String},
time_dimension::String,
dimension_size::Vector{Int},
initial_date::Union{String, DateTime} = "",
unit::String = "",
digits::Union{Int, Nothing} = nothing,
) where {I <: Implementation, T, N}
kwargs_dict = Dict{Symbol, Any}()
if initial_date !== ""
if isa(initial_date, String)
initial_date = DateTime(initial_date, "yyyy-mm-ddTHH:MM:SS")
end
kwargs_dict[:initial_date] = initial_date
end
if unit != ""
kwargs_dict[:unit] = unit
else
@warn("No unit was provided for the time series file \"$filename\".")
end

writer = Quiver.Writer{implementation}(
filename;
dimensions,
labels,
time_dimension,
dimension_size,
kwargs_dict...,
)

reverse_dimensions = Symbol.(reverse(dimensions))

for dims in Iterators.product([1:size for size in reverse(dimension_size)]...)
dim_kwargs = OrderedDict(reverse_dimensions .=> dims)
Quiver.write!(writer, round_digits(data[:, dims...], digits); dim_kwargs...)
end

Quiver.close!(writer)

return nothing
end

function round_digits(vec::Vector{T}, ::Nothing) where {T}
return vec
end

function round_digits(vec::Vector{T}, digits::Int) where {T}
return round.(vec, digits)
end
57 changes: 52 additions & 5 deletions test/test_read_write.jl
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,52 @@ function read_write_out_of_order_kwargs(impl)
rm("$filename.toml")
end

function read_file_to_array(impl)
filename = joinpath(@__DIR__, "test_read_file_to_array")

initial_date = DateTime(2006, 1, 1)
num_stages = 4
dates = collect(initial_date:Dates.Month(1):initial_date + Dates.Month(num_stages - 1))
num_scenarios = 3
num_blocks_per_stage = Int32.(Dates.daysinmonth.(dates) .* 24)
num_time_series = 3

dimensions = ["stage", "scenario", "block"]
labels = ["agent_$i" for i in 1:num_time_series]
time_dimension = "stage"
dimension_size = [num_stages, num_scenarios, maximum(num_blocks_per_stage)]

data = zeros(num_time_series, maximum(num_blocks_per_stage), num_scenarios, num_stages)
for stage in 1:num_stages
for scenario in 1:num_scenarios
for block in 1:num_blocks_per_stage[stage]
for i in 1:num_time_series
data[i, block, scenario, stage] = stage + scenario + block + i
end
end
end
end

Quiver.array_to_file(
filename,
data,
impl;
dimensions,
labels,
time_dimension,
dimension_size,
initial_date
)

data_read, metadata = Quiver.file_to_array(filename, impl)

@test size(data) == size(data_read)

for i in eachindex(data)
@test data[i] == data_read[i]
end
end

function test_read_write_implementations()
for impl in Quiver.implementations()
@testset "Read and Write $(impl)" begin
Expand All @@ -956,18 +1002,19 @@ function test_read_write_implementations()
read_write_3(impl)
read_write_4(impl)
read_write_5(impl)
if impl == Quiver.csv
read_write_goto_csv_1()
read_write_goto_csv_2()
read_write_goto_csv_3()
end
read_write_carrousel(impl)
read_outside_bounds_1(impl)
read_outside_bounds_2(impl)
read_outside_bounds_3(impl)
read_outside_bounds_4(impl)
read_filtering_labels(impl)
read_write_out_of_order_kwargs(impl)
read_file_to_array(impl)
if impl == Quiver.csv
read_write_goto_csv_1()
read_write_goto_csv_2()
read_write_goto_csv_3()
end
end
end
end
Expand Down

2 comments on commit 0e81bfd

@guilhermebodin
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/115132

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.1.4 -m "<description of version>" 0e81bfdbb0d8310d397dc9b8aa7438a525ffe8c0
git push origin v0.1.4

Please sign in to comment.