add a fast way to transform a Quiver file into datafram directly

psrenergy · Sep 23, 2024 · 803a232 · 803a232
1 parent f9112c2
commit 803a232
Show file tree

Hide file tree

Showing 4 changed files with 103 additions and 4 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Quiver"
 uuid = "cdbb3f72-2527-4dbd-9d0e-93533a5519ac"
 authors = ["raphasampaio", "guilhermebodin"]
-version = "0.1.5"
+version = "0.1.6"
 
 [deps]
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"

diff --git a/src/metadata.jl b/src/metadata.jl
@@ -57,8 +57,8 @@ function Metadata(;
     return metadata
 end
 
-function to_toml(metadata::Metadata, filename::String)
-    dict_metadata = OrderedDict(
+function to_ordered_dict(metadata::Metadata)
+    return OrderedDict(
         "version" => metadata.version,
         "dimensions" => String.(metadata.dimensions),
         "dimension_size" => metadata.dimension_size,
@@ -68,6 +68,10 @@ function to_toml(metadata::Metadata, filename::String)
         "unit" => metadata.unit,
         "labels" => metadata.labels,
     )
+end
+
+function to_toml(metadata::Metadata, filename::String)
+    dict_metadata = to_ordered_dict(metadata)
     open(filename, "w") do io
         TOML.print(io, dict_metadata)
     end

diff --git a/src/reader.jl b/src/reader.jl
@@ -126,7 +126,6 @@ function file_to_array(
     reader = Reader{I}(
         filename;
         labels_to_read,
-        carrousel = false, # carrousel does not make sense in this implemetations
     )
 
     metadata = reader.metadata
@@ -147,4 +146,49 @@ function file_to_array(
     Quiver.close!(reader)
 
     return data, metadata
+end
+
+function file_to_df(
+    filename::String,
+    implementation::Type{I};
+    labels_to_read::Vector{String} = String[],
+) where {I <: Implementation}
+    reader = Reader{I}(
+        filename;
+        labels_to_read,
+    )
+
+    metadata = reader.metadata
+    dimension_names = reverse(metadata.dimensions)
+    dimension_sizes = reverse(metadata.dimension_size)
+
+    df = DataFrame()
+
+    # Add all columns to the DataFrame
+    for dim in metadata.dimensions
+        DataFrames.insertcols!(df, dim => Int[])
+    end
+    for label in reader.labels_to_read
+        DataFrames.insertcols!(df, label => Float32[])
+    end
+
+    for dims in Iterators.product([1:size for size in dimension_sizes]...)
+        dim_kwargs = OrderedDict(Symbol.(dimension_names) .=> dims)
+        Quiver.goto!(reader; dim_kwargs...)
+        if all(isnan.(reader.data))
+            continue
+        end
+        # Construct the data frame row by row
+        push!(df, [reverse(dims)...; reader.data...])
+    end
+
+    # Add metadata to DataFrame
+    orderec_dict_metadata = to_ordered_dict(metadata)
+    for (k, v) in orderec_dict_metadata
+        DataFrames.metadata!(df, k, v)
+    end
+
+    Quiver.close!(reader)
+
+    return df
 end
diff --git a/test/test_read_write.jl b/test/test_read_write.jl
@@ -2,6 +2,7 @@ module TestWriter
 
 using Test
 using Quiver
+using DataFrames
 using Dates
 
 function read_write_1(impl)
@@ -1116,6 +1117,55 @@ function read_file_to_array(impl)
     end
 end
 
+function read_file_to_df(impl)
+    filename = joinpath(@__DIR__, "test_read_file_to_df")
+
+    initial_date = DateTime(2006, 1, 1)
+    num_stages = 4
+    dates = collect(initial_date:Dates.Month(1):initial_date + Dates.Month(num_stages - 1))
+    num_scenarios = 3
+    num_blocks_per_stage = Int32.(Dates.daysinmonth.(dates) .* 24)
+    num_time_series = 3
+
+    dimensions = ["stage", "scenario", "block"]
+    labels = ["agent_$i" for i in 1:num_time_series]
+    time_dimension = "stage"
+    dimension_size = [num_stages, num_scenarios, maximum(num_blocks_per_stage)]
+
+    data = zeros(num_time_series, maximum(num_blocks_per_stage), num_scenarios, num_stages)
+    for stage in 1:num_stages
+        for scenario in 1:num_scenarios
+            for block in 1:num_blocks_per_stage[stage]
+                for i in 1:num_time_series
+                    data[i, block, scenario, stage] = stage + scenario + block + i
+                end
+            end
+        end
+    end
+
+    Quiver.array_to_file(
+        filename,
+        data,
+        impl;
+        dimensions,
+        labels,
+        time_dimension,
+        dimension_size,
+        initial_date,
+        unit = " - "
+    )
+
+    df = Quiver.file_to_df(filename, impl)
+
+    # This might be innacurate, if it fails these tests can be removed
+    @test size(df, 1) == 8928
+    @test size(df, 2) == 6
+
+    @test DataFrames.metadata(df, "time_dimension") == "stage"
+    @test DataFrames.metadata(df, "dimensions") == ["stage", "scenario", "block"]
+    @test DataFrames.metadata(df, "labels") == ["agent_1", "agent_2", "agent_3"]
+end
+
 function test_read_write_implementations()
     for impl in Quiver.implementations()
         @testset "Read and Write $(impl)" begin
@@ -1132,6 +1182,7 @@ function test_read_write_implementations()
             read_filtering_labels(impl)
             read_write_out_of_order_kwargs(impl)
             read_file_to_array(impl)
+            read_file_to_df(impl)
             if impl == Quiver.csv
                 read_write_goto_csv_1()
                 read_write_goto_csv_2()