Skip to content

Commit

Permalink
add a fast way to transform a Quiver file into datafram directly
Browse files Browse the repository at this point in the history
  • Loading branch information
guilhermebodin committed Sep 23, 2024
1 parent f9112c2 commit 803a232
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 4 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Quiver"
uuid = "cdbb3f72-2527-4dbd-9d0e-93533a5519ac"
authors = ["raphasampaio", "guilhermebodin"]
version = "0.1.5"
version = "0.1.6"

[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
Expand Down
8 changes: 6 additions & 2 deletions src/metadata.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ function Metadata(;
return metadata
end

function to_toml(metadata::Metadata, filename::String)
dict_metadata = OrderedDict(
function to_ordered_dict(metadata::Metadata)
return OrderedDict(
"version" => metadata.version,
"dimensions" => String.(metadata.dimensions),
"dimension_size" => metadata.dimension_size,
Expand All @@ -68,6 +68,10 @@ function to_toml(metadata::Metadata, filename::String)
"unit" => metadata.unit,
"labels" => metadata.labels,
)
end

function to_toml(metadata::Metadata, filename::String)
dict_metadata = to_ordered_dict(metadata)
open(filename, "w") do io
TOML.print(io, dict_metadata)
end
Expand Down
46 changes: 45 additions & 1 deletion src/reader.jl
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ function file_to_array(
reader = Reader{I}(
filename;
labels_to_read,
carrousel = false, # carrousel does not make sense in this implemetations
)

metadata = reader.metadata
Expand All @@ -147,4 +146,49 @@ function file_to_array(
Quiver.close!(reader)

return data, metadata
end

function file_to_df(
filename::String,
implementation::Type{I};
labels_to_read::Vector{String} = String[],
) where {I <: Implementation}
reader = Reader{I}(
filename;
labels_to_read,
)

metadata = reader.metadata
dimension_names = reverse(metadata.dimensions)
dimension_sizes = reverse(metadata.dimension_size)

df = DataFrame()

# Add all columns to the DataFrame
for dim in metadata.dimensions
DataFrames.insertcols!(df, dim => Int[])
end
for label in reader.labels_to_read
DataFrames.insertcols!(df, label => Float32[])
end

for dims in Iterators.product([1:size for size in dimension_sizes]...)
dim_kwargs = OrderedDict(Symbol.(dimension_names) .=> dims)
Quiver.goto!(reader; dim_kwargs...)
if all(isnan.(reader.data))
continue
end
# Construct the data frame row by row
push!(df, [reverse(dims)...; reader.data...])
end

# Add metadata to DataFrame
orderec_dict_metadata = to_ordered_dict(metadata)
for (k, v) in orderec_dict_metadata
DataFrames.metadata!(df, k, v)
end

Quiver.close!(reader)

return df
end
51 changes: 51 additions & 0 deletions test/test_read_write.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ module TestWriter

using Test
using Quiver
using DataFrames
using Dates

function read_write_1(impl)
Expand Down Expand Up @@ -1116,6 +1117,55 @@ function read_file_to_array(impl)
end
end

function read_file_to_df(impl)
filename = joinpath(@__DIR__, "test_read_file_to_df")

initial_date = DateTime(2006, 1, 1)
num_stages = 4
dates = collect(initial_date:Dates.Month(1):initial_date + Dates.Month(num_stages - 1))
num_scenarios = 3
num_blocks_per_stage = Int32.(Dates.daysinmonth.(dates) .* 24)
num_time_series = 3

dimensions = ["stage", "scenario", "block"]
labels = ["agent_$i" for i in 1:num_time_series]
time_dimension = "stage"
dimension_size = [num_stages, num_scenarios, maximum(num_blocks_per_stage)]

data = zeros(num_time_series, maximum(num_blocks_per_stage), num_scenarios, num_stages)
for stage in 1:num_stages
for scenario in 1:num_scenarios
for block in 1:num_blocks_per_stage[stage]
for i in 1:num_time_series
data[i, block, scenario, stage] = stage + scenario + block + i
end
end
end
end

Quiver.array_to_file(
filename,
data,
impl;
dimensions,
labels,
time_dimension,
dimension_size,
initial_date,
unit = " - "
)

df = Quiver.file_to_df(filename, impl)

# This might be innacurate, if it fails these tests can be removed
@test size(df, 1) == 8928
@test size(df, 2) == 6

@test DataFrames.metadata(df, "time_dimension") == "stage"
@test DataFrames.metadata(df, "dimensions") == ["stage", "scenario", "block"]
@test DataFrames.metadata(df, "labels") == ["agent_1", "agent_2", "agent_3"]
end

function test_read_write_implementations()
for impl in Quiver.implementations()
@testset "Read and Write $(impl)" begin
Expand All @@ -1132,6 +1182,7 @@ function test_read_write_implementations()
read_filtering_labels(impl)
read_write_out_of_order_kwargs(impl)
read_file_to_array(impl)
read_file_to_df(impl)
if impl == Quiver.csv
read_write_goto_csv_1()
read_write_goto_csv_2()
Expand Down

0 comments on commit 803a232

Please sign in to comment.