Skip to content

Commit

Permalink
Add method to read full array
Browse files Browse the repository at this point in the history
  • Loading branch information
guilhermebodin committed May 17, 2024
1 parent 2d712e0 commit 27ff35e
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 38 deletions.
39 changes: 39 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: CI
on:
push:
branches: [master]
pull_request:
types: [opened, synchronize, reopened]
jobs:
test:
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
- version: '1'
os: windows-latest
arch: x64
- version: '1'
os: ubuntu-latest
arch: x64
steps:
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@v1
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: actions/cache@v1
env:
cache-name: cache-artifacts
with:
path: ~/.julia/artifacts
key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
restore-keys: |
${{ runner.os }}-test-${{ env.cache-name }}-
${{ runner.os }}-test-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- uses: julia-actions/julia-processcoverage@v1
3 changes: 3 additions & 0 deletions src/arrow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ end

function _quiver_read_df(reader::QuiverReader{arrow, DataFrame}; kwargs...)
dimensions_to_query = values(kwargs)
if isempty(dimensions_to_query)
return reader.reader
end
indexes_to_search_at_dimension = Vector{UnitRange{Int}}(undef, length(dimensions_to_query) + 1)
indexes_found_at_dimension = Vector{UnitRange{Int}}(undef, length(dimensions_to_query))
indexes_to_search_at_dimension[1] = 1:size(reader.reader, 1)
Expand Down
3 changes: 3 additions & 0 deletions src/csv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ end

function _quiver_read_df(reader::QuiverReader{csv, DataFrame}; kwargs...)
dimensions_to_query = values(kwargs)
if isempty(dimensions_to_query)
return reader.reader
end
indexes_to_search_at_dimension = Vector{UnitRange{Int}}(undef, length(dimensions_to_query) + 1)
indexes_found_at_dimension = Vector{UnitRange{Int}}(undef, length(dimensions_to_query))
indexes_to_search_at_dimension[1] = 1:size(reader.reader, 1)
Expand Down
31 changes: 16 additions & 15 deletions src/writer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,6 @@ function _create_matrix_of_dimension_to_write(writer::QuiverWriter; provided_dim
return dimensions
end

function write!(writer::QuiverWriter, agents::Matrix{F}; provided_dimensions...) where F <: AbstractFloat
dimensions = _create_matrix_of_dimension_to_write(writer; provided_dimensions...)
write!(writer, dimensions, agents)
return nothing
end

function write!(writer::QuiverWriter, agents::Array{F, N}; provided_dimensions...) where {F <: AbstractFloat, N}
_assert_dimensions_are_in_order(writer; provided_dimensions...)
# Create a matrix of Integers with the dimensions to write
Expand All @@ -124,26 +118,33 @@ function write!(writer::QuiverWriter, agents::Array{F, N}; provided_dimensions..
max_dimension_per_not_provided_dimension = writer.metadata.maximum_value_of_each_dimension[indexes_of_dimensions_missing]
number_of_rows = prod(max_dimension_per_not_provided_dimension)
agent_array_sizes = size(agents)
number_of_agents = agent_array_sizes[end]
number_of_agents = agent_array_sizes[1]

# Check if agent array sizes are compatible with the dimensions we need to provide.
for (i, s) in enumerate(agent_array_sizes[1:end-1])
for (i, s) in enumerate(agent_array_sizes[end:-1:2])
if s != max_dimension_per_not_provided_dimension[i]
error("Expected dimensions are $([max_dimension_per_not_provided_dimension, number_of_agents]), provided array has $(agent_array_sizes).")
error("Expected dimensions are $([max_dimension_per_not_provided_dimension; number_of_agents]), provided array has $(agent_array_sizes).")
end
end

# Reshape the array into a matrix in order to be mapped to a DataFrame in the correct dimensions
number_of_rows = prod(max_dimension_per_not_provided_dimension)

# TODO this could be much more performatic. It currently allocates a lot. This
# Could be written in one pass without creating the two copies it is currently doing.
perm = (collect(ndims(agents)-1:-1:1)..., ndims(agents))
permuted_agents = permutedims(agents, perm)
matrix_agents = reshape(permuted_agents, (number_of_rows, number_of_agents))
# Build dimensions matrix
dimensions = _create_matrix_of_dimension_to_write(writer; provided_dimensions...)
matrix_agents = zeros(Float32, number_of_rows, number_of_agents)
correct_index = zeros(Int, length(indexes_of_dimensions_missing))
for ag in 1:number_of_agents
for i in 1:number_of_rows
for (j, dim) in enumerate(Iterators.reverse(indexes_of_dimensions_missing))
correct_index[j] = dimensions[i, dim]
end
matrix_agents[i, ag] = agents[ag, correct_index...]
end
end

# Pass to the next function to build the dimensions and write it.
Quiver.write!(writer, matrix_agents; provided_dimensions...)
Quiver.write!(writer, dimensions, matrix_agents)
return nothing
end

Expand Down
56 changes: 33 additions & 23 deletions test/test_read_write.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ function read_write_with_implementation(impl)
Quiver.close!(reader)
end

function read_write_with_implementation_not_passing_dimensions(impl)
filename = joinpath(@__DIR__, "test_read_write_not_passing_dimensions")
function read_write_with_implementation_passing_array(impl)
filename = joinpath(@__DIR__, "test_read_write_with_implementation_passing_array")
initial_date = DateTime(2006, 1, 1)
num_stages = 10
num_scenarios = 12
Expand All @@ -74,8 +74,14 @@ function read_write_with_implementation_not_passing_dimensions(impl)
[num_stages, num_scenarios, num_blocks];
initial_date = initial_date
)

agents = zeros(Float32, num_agents, num_blocks, num_scenarios)
for stage in 1:num_stages
agents = stage * ones(Float32, num_scenarios * num_blocks, num_agents)
for scenario in 1:num_scenarios
for block in 1:num_blocks
agents[:, block, scenario] .= stage * scenario
end
end
Quiver.write!(writer, agents; stage = stage)
end
Quiver.close!(writer)
Expand All @@ -87,14 +93,14 @@ function read_write_with_implementation_not_passing_dimensions(impl)
for scenario in 1:num_scenarios
result = Quiver.read(reader; stage = stage, scenario = scenario)
@test size(result) == (num_blocks, num_agents)
@test unique(result)[1] == stage
@test unique(result)[1] == stage * scenario
end
end
Quiver.close!(reader)
end

function read_write_with_implementation_passing_array(impl)
filename = joinpath(@__DIR__, "test_read_write_with_implementation_passing_array")
function read_write_with_implementation_passing_full_array(impl)
filename = joinpath(@__DIR__, "test_read_write_with_implementation_passing_full_array")
initial_date = DateTime(2006, 1, 1)
num_stages = 10
num_scenarios = 12
Expand All @@ -114,62 +120,66 @@ function read_write_with_implementation_passing_array(impl)
initial_date = initial_date
)

agents = zeros(Float32, num_scenarios, num_blocks, num_agents)
agents = zeros(Float32, num_agents, num_blocks, num_scenarios, num_stages)
for stage in 1:num_stages
for scenario in 1:num_scenarios
for block in 1:num_blocks
agents[scenario, block, :] .= stage * scenario
for ag in 1:num_agents
agents[ag, block, scenario, stage] = ag * block * scenario * stage
end
end
end
Quiver.write!(writer, agents; stage = stage)
end
Quiver.write!(writer, agents)
Quiver.close!(writer)

reader = QuiverReader{impl}(filename);
num_stages = Quiver.max_index(reader, "stage")
num_scenarios = Quiver.max_index(reader, "scenario")
result = Quiver.read(reader)
row = 1
for stage in 1:num_stages
for scenario in 1:num_scenarios
result = Quiver.read(reader; stage = stage, scenario = scenario)
@test size(result) == (num_blocks, num_agents)
@test unique(result)[1] == stage * scenario
for block in 1:num_blocks
@test agents[:, block, scenario, stage] == result[row, :]
row += 1
end
end
end
Quiver.close!(reader)
end


function test_read_write()
for impl in Quiver.implementations()
read_write_with_implementation(impl)
end
# read_write_with_implementation(Quiver.csv)
# Windows has some kind of problem releasing the Arrow file mmaped
GC.gc()
GC.gc()
rm(joinpath(@__DIR__, "test_read_write.arrow"))
rm(joinpath(@__DIR__, "test_read_write.csv"))
end

function test_read_write_not_passing_dimensions()
function test_read_write_with_implementation_passing_array()
for impl in Quiver.implementations()
read_write_with_implementation_not_passing_dimensions(impl)
read_write_with_implementation_passing_array(impl)
end
# Windows has some kind of problem releasing the Arrow file mmaped
GC.gc()
GC.gc()
rm(joinpath(@__DIR__, "test_read_write_not_passing_dimensions.arrow"))
rm(joinpath(@__DIR__, "test_read_write_not_passing_dimensions.csv"))
rm(joinpath(@__DIR__, "test_read_write_with_implementation_passing_array.arrow"))
rm(joinpath(@__DIR__, "test_read_write_with_implementation_passing_array.csv"))
end

function test_read_write_with_implementation_passing_array()

function test_read_write_with_implementation_passing_full_array()
for impl in Quiver.implementations()
read_write_with_implementation_passing_array(impl)
read_write_with_implementation_passing_full_array(impl)
end
# Windows has some kind of problem releasing the Arrow file mmaped
GC.gc()
GC.gc()
rm(joinpath(@__DIR__, "test_read_write_with_implementation_passing_array.arrow"))
rm(joinpath(@__DIR__, "test_read_write_with_implementation_passing_array.csv"))
rm(joinpath(@__DIR__, "test_read_write_with_implementation_passing_full_array.arrow"))
rm(joinpath(@__DIR__, "test_read_write_with_implementation_passing_full_array.csv"))
end

function runtests()
Expand Down

0 comments on commit 27ff35e

Please sign in to comment.