Add method to read full array

psrenergy · May 17, 2024 · 27ff35e · 27ff35e
1 parent 2d712e0
commit 27ff35e
Show file tree

Hide file tree

Showing 5 changed files with 94 additions and 38 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,39 @@
+name: CI
+on:
+  push:
+    branches: [master]
+  pull_request:
+    types: [opened, synchronize, reopened]
+jobs:
+  test:
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - version: '1'
+            os: windows-latest
+            arch: x64
+          - version: '1'
+            os: ubuntu-latest
+            arch: x64
+    steps:
+      - uses: actions/checkout@v2
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: ${{ matrix.version }}
+          arch: ${{ matrix.arch }}
+      - uses: actions/cache@v1
+        env:
+          cache-name: cache-artifacts
+        with:
+          path: ~/.julia/artifacts
+          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
+          restore-keys: |
+            ${{ runner.os }}-test-${{ env.cache-name }}-
+            ${{ runner.os }}-test-
+            ${{ runner.os }}-
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-runtest@v1
+      - uses: julia-actions/julia-processcoverage@v1
diff --git a/src/arrow.jl b/src/arrow.jl
@@ -81,6 +81,9 @@ end
 
 function _quiver_read_df(reader::QuiverReader{arrow, DataFrame}; kwargs...)
     dimensions_to_query = values(kwargs)
+    if isempty(dimensions_to_query)
+        return reader.reader
+    end
     indexes_to_search_at_dimension = Vector{UnitRange{Int}}(undef, length(dimensions_to_query) + 1)
     indexes_found_at_dimension = Vector{UnitRange{Int}}(undef, length(dimensions_to_query))
     indexes_to_search_at_dimension[1] = 1:size(reader.reader, 1)

diff --git a/src/csv.jl b/src/csv.jl
@@ -101,6 +101,9 @@ end
 
 function _quiver_read_df(reader::QuiverReader{csv, DataFrame}; kwargs...)
     dimensions_to_query = values(kwargs)
+    if isempty(dimensions_to_query)
+        return reader.reader
+    end
     indexes_to_search_at_dimension = Vector{UnitRange{Int}}(undef, length(dimensions_to_query) + 1)
     indexes_found_at_dimension = Vector{UnitRange{Int}}(undef, length(dimensions_to_query))
     indexes_to_search_at_dimension[1] = 1:size(reader.reader, 1)

diff --git a/src/writer.jl b/src/writer.jl
@@ -109,12 +109,6 @@ function _create_matrix_of_dimension_to_write(writer::QuiverWriter; provided_dim
     return dimensions
 end
 
-function write!(writer::QuiverWriter, agents::Matrix{F}; provided_dimensions...) where F <: AbstractFloat
-    dimensions = _create_matrix_of_dimension_to_write(writer; provided_dimensions...)
-    write!(writer, dimensions, agents)
-    return nothing
-end
-
 function write!(writer::QuiverWriter, agents::Array{F, N}; provided_dimensions...) where {F <: AbstractFloat, N}
     _assert_dimensions_are_in_order(writer; provided_dimensions...)
     # Create a matrix of Integers with the dimensions to write
@@ -124,26 +118,33 @@ function write!(writer::QuiverWriter, agents::Array{F, N}; provided_dimensions..
     max_dimension_per_not_provided_dimension = writer.metadata.maximum_value_of_each_dimension[indexes_of_dimensions_missing]
     number_of_rows = prod(max_dimension_per_not_provided_dimension)
     agent_array_sizes = size(agents)
-    number_of_agents = agent_array_sizes[end]
+    number_of_agents = agent_array_sizes[1]
 
     # Check if agent array sizes are compatible with the dimensions we need to provide.
-    for (i, s) in enumerate(agent_array_sizes[1:end-1])
+    for (i, s) in enumerate(agent_array_sizes[end:-1:2])
         if s != max_dimension_per_not_provided_dimension[i]
-            error("Expected dimensions are $([max_dimension_per_not_provided_dimension, number_of_agents]), provided array has $(agent_array_sizes).")
+            error("Expected dimensions are $([max_dimension_per_not_provided_dimension; number_of_agents]), provided array has $(agent_array_sizes).")
         end
     end
 
     # Reshape the array into a matrix in order to be mapped to a DataFrame in the correct dimensions
     number_of_rows = prod(max_dimension_per_not_provided_dimension)
 
-    # TODO this could be much more performatic. It currently allocates a lot. This 
-    # Could be written in one pass without creating the two copies it is currently doing.
-    perm = (collect(ndims(agents)-1:-1:1)..., ndims(agents))
-    permuted_agents = permutedims(agents, perm)
-    matrix_agents = reshape(permuted_agents, (number_of_rows, number_of_agents))
+    # Build dimensions matrix
+    dimensions = _create_matrix_of_dimension_to_write(writer; provided_dimensions...)
+    matrix_agents = zeros(Float32, number_of_rows, number_of_agents)
+    correct_index = zeros(Int, length(indexes_of_dimensions_missing))
+    for ag in 1:number_of_agents
+        for i in 1:number_of_rows
+            for (j, dim) in enumerate(Iterators.reverse(indexes_of_dimensions_missing))
+                correct_index[j] = dimensions[i, dim]
+            end
+            matrix_agents[i, ag] = agents[ag, correct_index...]
+        end
+    end
 
     # Pass to the next function to build the dimensions and write it.
-    Quiver.write!(writer, matrix_agents; provided_dimensions...)
+    Quiver.write!(writer, dimensions, matrix_agents)
     return nothing
 end
 

diff --git a/test/test_read_write.jl b/test/test_read_write.jl
@@ -54,8 +54,8 @@ function read_write_with_implementation(impl)
     Quiver.close!(reader)
 end
 
-function read_write_with_implementation_not_passing_dimensions(impl)
-    filename = joinpath(@__DIR__, "test_read_write_not_passing_dimensions")
+function read_write_with_implementation_passing_array(impl)
+    filename = joinpath(@__DIR__, "test_read_write_with_implementation_passing_array")
     initial_date = DateTime(2006, 1, 1)
     num_stages = 10
     num_scenarios = 12
@@ -74,8 +74,14 @@ function read_write_with_implementation_not_passing_dimensions(impl)
         [num_stages, num_scenarios, num_blocks];
         initial_date = initial_date
     )
+
+    agents = zeros(Float32, num_agents, num_blocks, num_scenarios)
     for stage in 1:num_stages
-        agents = stage * ones(Float32, num_scenarios * num_blocks, num_agents)
+        for scenario in 1:num_scenarios
+            for block in 1:num_blocks
+                agents[:, block, scenario] .= stage * scenario
+            end
+        end
         Quiver.write!(writer, agents; stage = stage)
     end
     Quiver.close!(writer)
@@ -87,14 +93,14 @@ function read_write_with_implementation_not_passing_dimensions(impl)
         for scenario in 1:num_scenarios
             result = Quiver.read(reader; stage = stage, scenario = scenario)
             @test size(result) == (num_blocks, num_agents)
-            @test unique(result)[1] == stage
+            @test unique(result)[1] == stage * scenario
         end
     end
     Quiver.close!(reader)
 end
 
-function read_write_with_implementation_passing_array(impl)
-    filename = joinpath(@__DIR__, "test_read_write_with_implementation_passing_array")
+function read_write_with_implementation_passing_full_array(impl)
+    filename = joinpath(@__DIR__, "test_read_write_with_implementation_passing_full_array")
     initial_date = DateTime(2006, 1, 1)
     num_stages = 10
     num_scenarios = 12
@@ -114,62 +120,66 @@ function read_write_with_implementation_passing_array(impl)
         initial_date = initial_date
     )
 
-    agents = zeros(Float32, num_scenarios, num_blocks, num_agents)
+    agents = zeros(Float32, num_agents, num_blocks, num_scenarios, num_stages)
     for stage in 1:num_stages
         for scenario in 1:num_scenarios
             for block in 1:num_blocks
-                agents[scenario, block, :] .= stage * scenario
+                for ag in 1:num_agents
+                    agents[ag, block, scenario, stage] = ag * block * scenario * stage
+                end
             end
         end
-        Quiver.write!(writer, agents; stage = stage)
     end
+    Quiver.write!(writer, agents)
     Quiver.close!(writer)
 
     reader = QuiverReader{impl}(filename);
-    num_stages = Quiver.max_index(reader, "stage")
-    num_scenarios = Quiver.max_index(reader, "scenario")
+    result = Quiver.read(reader)
+    row = 1
     for stage in 1:num_stages
         for scenario in 1:num_scenarios
-            result = Quiver.read(reader; stage = stage, scenario = scenario)
-            @test size(result) == (num_blocks, num_agents)
-            @test unique(result)[1] == stage * scenario
+            for block in 1:num_blocks
+                @test agents[:, block, scenario, stage] == result[row, :]
+                row += 1
+            end
         end
     end
     Quiver.close!(reader)
 end
 
+
 function test_read_write()
     for impl in Quiver.implementations()
         read_write_with_implementation(impl)
     end
-    # read_write_with_implementation(Quiver.csv)
     # Windows has some kind of problem releasing the Arrow file mmaped
     GC.gc()
     GC.gc()
     rm(joinpath(@__DIR__, "test_read_write.arrow"))
     rm(joinpath(@__DIR__, "test_read_write.csv"))
 end
 
-function test_read_write_not_passing_dimensions()
+function test_read_write_with_implementation_passing_array()
     for impl in Quiver.implementations()
-        read_write_with_implementation_not_passing_dimensions(impl)
+        read_write_with_implementation_passing_array(impl)
     end
     # Windows has some kind of problem releasing the Arrow file mmaped
     GC.gc()
     GC.gc()
-    rm(joinpath(@__DIR__, "test_read_write_not_passing_dimensions.arrow"))
-    rm(joinpath(@__DIR__, "test_read_write_not_passing_dimensions.csv"))
+    rm(joinpath(@__DIR__, "test_read_write_with_implementation_passing_array.arrow"))
+    rm(joinpath(@__DIR__, "test_read_write_with_implementation_passing_array.csv"))
 end
 
-function test_read_write_with_implementation_passing_array()
+
+function test_read_write_with_implementation_passing_full_array()
     for impl in Quiver.implementations()
-        read_write_with_implementation_passing_array(impl)
+        read_write_with_implementation_passing_full_array(impl)
     end
     # Windows has some kind of problem releasing the Arrow file mmaped
     GC.gc()
     GC.gc()
-    rm(joinpath(@__DIR__, "test_read_write_with_implementation_passing_array.arrow"))
-    rm(joinpath(@__DIR__, "test_read_write_with_implementation_passing_array.csv"))
+    rm(joinpath(@__DIR__, "test_read_write_with_implementation_passing_full_array.arrow"))
+    rm(joinpath(@__DIR__, "test_read_write_with_implementation_passing_full_array.csv"))
 end
 
 function runtests()