diff --git a/src/binary.jl b/src/binary.jl index bbf6dcc..b045e3d 100644 --- a/src/binary.jl +++ b/src/binary.jl @@ -107,14 +107,16 @@ function Reader{binary}( io = open(filename_with_extensions, "r") - last_dimension_read = zeros(Int, metadata.number_of_dimensions) + dimension_in_cache = zeros(Int, metadata.number_of_dimensions) + dimension_to_read = zeros(Int, metadata.number_of_dimensions) reader = try Quiver.Reader{binary}( io, filename, metadata, - last_dimension_read; + dimension_in_cache, + dimension_to_read; labels_to_read = isempty(labels_to_read) ? metadata.labels : labels_to_read, carrousel = carrousel, ) @@ -127,7 +129,7 @@ function Reader{binary}( end function _quiver_goto!(reader::Quiver.Reader{binary}) - next_pos = _calculate_position_in_file(reader.metadata, reader.last_dimension_read...) + next_pos = _calculate_position_in_file(reader.metadata, reader.dimension_to_read...) # Check if we need to seek a new position or write directly in the io # This is absolutely necessary for performance in the binary operation current_pos = position(reader.reader) diff --git a/src/csv.jl b/src/csv.jl index 4653dae..e21d2ab 100644 --- a/src/csv.jl +++ b/src/csv.jl @@ -80,7 +80,8 @@ function Reader{csv}( reusebuffer = true, ) - last_dimension_read = zeros(Int, metadata.number_of_dimensions) + dimension_in_cache = zeros(Int, metadata.number_of_dimensions) + dimension_to_read = zeros(Int, metadata.number_of_dimensions) next = iterate(rows) (row, state) = next @@ -92,7 +93,8 @@ function Reader{csv}( row_reader, filename, metadata, - last_dimension_read; + dimension_in_cache, + dimension_to_read; labels_to_read = isempty(labels_to_read) ? metadata.labels : labels_to_read, carrousel = carrousel, ) @@ -111,7 +113,7 @@ function _quiver_next_dimension!(reader::Quiver.Reader{csv}) end (row, state) = reader.reader.next for (i, dim) in enumerate(reader.metadata.dimensions) - reader.last_dimension_read[i] = row[dim] + reader.dimension_to_read[i] = row[dim] end for (i, ts) in enumerate(reader.metadata.labels) reader.all_labels_data_cache[i] = row[Symbol(ts)] @@ -151,38 +153,41 @@ function _quiver_goto!(reader::Quiver.Reader{csv}) return nothing end + dimension_in_cache = reader.dimension_in_cache + dimension_to_read = reader.dimension_to_read dimension_in_iterator = _current_dimension_in_iterator(reader) - dimension_to_read = reader.last_dimension_read - order_of_dimension_in_iterator = _calculate_order_in_file(reader.metadata, dimension_in_iterator...) - order_of_dimension_to_read = _calculate_order_in_file(reader.metadata, dimension_to_read...) - - if order_of_dimension_in_iterator > order_of_dimension_to_read - error("Cannot read a dimension that is posterior to the current dimension") - elseif order_of_dimension_in_iterator == order_of_dimension_to_read + if dimension_to_read == reader.metadata.dimension_size (row, state) = reader.reader.next - is_first_index = true for (i, dim) in enumerate(reader.metadata.dimensions) - reader.last_dimension_read[i] = row[dim] - is_first_index = is_first_index && row[dim] == 1 + reader.dimension_to_read[i] = row[dim] end - for (i, ts) in enumerate(reader.metadata.labels) - if is_first_index - reader.all_labels_data_cache[i] = row[Symbol(ts)] - else + reader.all_labels_data_cache[i] = row[Symbol(ts)] + end + return nothing + end + + order_of_dimension_in_cache = _calculate_order_in_file(reader.metadata, dimension_in_cache...) + order_of_dimension_to_read = _calculate_order_in_file(reader.metadata, dimension_to_read...) + order_of_dimension_in_iterator = _calculate_order_in_file(reader.metadata, dimension_in_iterator...) + + if order_of_dimension_in_cache > order_of_dimension_to_read + error("Cannot read a dimension that is prior to the last dimension read") + elseif order_of_dimension_in_cache < order_of_dimension_to_read + if order_of_dimension_in_iterator > order_of_dimension_to_read + for (i, ts) in enumerate(reader.metadata.labels) reader.all_labels_data_cache[i] = NaN end + else + while order_of_dimension_in_iterator <= order_of_dimension_to_read + _quiver_next_dimension!(reader) + dimension_in_iterator = _current_dimension_in_iterator(reader) + order_of_dimension_in_iterator = _calculate_order_in_file(reader.metadata, dimension_in_iterator...) + end end - - _quiver_next_dimension!(reader) - return nothing else - while order_of_dimension_in_iterator <= order_of_dimension_to_read - _quiver_next_dimension!(reader) - dimension_in_iterator = _current_dimension_in_iterator(reader) - order_of_dimension_in_iterator = _calculate_order_in_file(reader.metadata, dimension_in_iterator...) - end + return nothing end return nothing end @@ -214,7 +219,7 @@ function convert( while reader.reader.next !== nothing Quiver.next_dimension!(reader) - dim_kwargs = OrderedDict(Symbol.(metadata.dimensions) .=> reader.last_dimension_read) + dim_kwargs = OrderedDict(Symbol.(metadata.dimensions) .=> reader.dimension_to_read) Quiver.write!(writer, reader.data; dim_kwargs...) end diff --git a/src/reader.jl b/src/reader.jl index 8a7d9b6..0328568 100644 --- a/src/reader.jl +++ b/src/reader.jl @@ -2,7 +2,8 @@ mutable struct Reader{I <: Implementation, R} reader::R filename::String metadata::Metadata - last_dimension_read::Vector{Int} + dimension_in_cache::Vector{Int} + dimension_to_read::Vector{Int} all_labels_data_cache::Vector{Float32} data::Vector{Float32} labels_to_read::Vector{String} @@ -12,7 +13,8 @@ mutable struct Reader{I <: Implementation, R} reader::R, filename::String, metadata::Metadata, - last_dimension_read::Vector{Int}; + dimension_in_cache::Vector{Int}, + dimension_to_read::Vector{Int}; labels_to_read::Vector{String} = metadata.labels, carrousel::Bool = false, ) where {I, R} @@ -40,7 +42,8 @@ mutable struct Reader{I <: Implementation, R} reader, filename, metadata, - last_dimension_read, + dimension_in_cache, + dimension_to_read, all_labels_data_cache, data, labels_to_read, @@ -52,17 +55,24 @@ mutable struct Reader{I <: Implementation, R} end end -function _build_last_dimension_read!(reader::Reader; dims...) +function _build_dimension_to_read!(reader::Reader; dims...) for (i, dim) in enumerate(reader.metadata.dimensions) if reader.carrousel - reader.last_dimension_read[i] = mod1(dims[dim], reader.metadata.dimension_size[i]) + reader.dimension_to_read[i] = mod1(dims[dim], reader.metadata.dimension_size[i]) else - reader.last_dimension_read[i] = dims[dim] + reader.dimension_to_read[i] = dims[dim] end end return nothing end +function _build_dimension_in_cache!(reader::Reader) + for i in 1:reader.metadata.number_of_dimensions + reader.dimension_in_cache[i] = reader.dimension_to_read[i] + end + return nothing +end + function _move_data_from_buffer_cache_to_data!(reader::Reader) @inbounds for (i, index) in enumerate(reader.indices_of_labels_to_read) reader.data[i] = reader.all_labels_data_cache[index] @@ -72,8 +82,9 @@ end function goto!(reader::Reader; dims...) validate_dimensions(reader.metadata, dims...) - _build_last_dimension_read!(reader; dims...) + _build_dimension_to_read!(reader; dims...) _quiver_goto!(reader) + _build_dimension_in_cache!(reader) _move_data_from_buffer_cache_to_data!(reader) return reader.data end diff --git a/test/test_read_write.jl b/test/test_read_write.jl index 6bed0d5..1ea18fb 100644 --- a/test/test_read_write.jl +++ b/test/test_read_write.jl @@ -448,9 +448,127 @@ function read_write_goto_csv_3() for block in 1:num_blocks for segment in 1:num_segments_per_block_scenario[block, scenario] data = [stage, scenario, block + segment] - if stage == scenario == block == segment == 1 + Quiver.write!(writer, data; stage, scenario, block, segment) + end + end + end + end + + Quiver.close!(writer) + + reader = Quiver.Reader{Quiver.csv}(filename) + for stage in 1:num_stages + for scenario in 1:num_scenarios + for block in 1:num_blocks + for segment in 1:max_num_segments + Quiver.goto!(reader; stage, scenario, block, segment) + if all(isnan.(reader.data)) + continue + end + @test reader.data == [stage, scenario, block + segment] + end + end + end + end + + Quiver.close!(reader) + + rm("$filename.$(Quiver.file_extension(Quiver.csv))") + rm("$filename.toml") +end + +function read_write_goto_csv_4() + filename = joinpath(@__DIR__, "test_read_goto_csv_4") + + initial_date = DateTime(2006, 1, 1) + num_stages = 10 + num_scenarios = 12 + num_blocks = 24 + num_segments_per_block_scenario = [s + b for b in 1:num_blocks, s in 1:num_scenarios] + max_num_segments = maximum(num_segments_per_block_scenario) + num_time_series = 3 + + dimensions = ["stage", "scenario", "block", "segment"] + labels = ["agent_$i" for i in 1:num_time_series] + time_dimension = "stage" + dimension_size = [num_stages, num_scenarios, num_blocks, max_num_segments] + + writer = Quiver.Writer{Quiver.csv}( + filename; + dimensions, + labels, + time_dimension, + dimension_size, + initial_date = initial_date + ) + + for stage in 1:num_stages + for scenario in 1:num_scenarios + for block in 1:num_blocks + for segment in 1:num_segments_per_block_scenario[block, scenario] + data = [stage, scenario, block + segment] + Quiver.write!(writer, data; stage, scenario, block, segment) + end + end + end + end + + Quiver.close!(writer) + + reader = Quiver.Reader{Quiver.csv}(filename) + for stage in 1:num_stages + for scenario in 1:num_scenarios + for block in 1:num_blocks + for segment in 1:max_num_segments + Quiver.goto!(reader; stage, scenario, block, segment) + if iseven(scenario) + continue + end + if all(isnan.(reader.data)) continue end + @test reader.data == [stage, scenario, block + segment] + end + end + end + end + + Quiver.close!(reader) + + rm("$filename.$(Quiver.file_extension(Quiver.csv))") + rm("$filename.toml") +end + +function read_write_goto_csv_5() + filename = joinpath(@__DIR__, "test_read_goto_csv_5") + + initial_date = DateTime(2006, 1, 1) + num_stages = 1 + num_scenarios = 1 + num_blocks = 1 + num_segments_per_block_scenario = [s + b for b in 1:num_blocks, s in 1:num_scenarios] + max_num_segments = maximum(num_segments_per_block_scenario) + num_time_series = 3 + + dimensions = ["stage", "scenario", "block", "segment"] + labels = ["agent_$i" for i in 1:num_time_series] + time_dimension = "stage" + dimension_size = [num_stages, num_scenarios, num_blocks, max_num_segments] + + writer = Quiver.Writer{Quiver.csv}( + filename; + dimensions, + labels, + time_dimension, + dimension_size, + initial_date = initial_date + ) + + for stage in 1:num_stages + for scenario in 1:num_scenarios + for block in 1:num_blocks + for segment in 1:num_segments_per_block_scenario[block, scenario] + data = [stage, scenario, block + segment] Quiver.write!(writer, data; stage, scenario, block, segment) end end @@ -460,7 +578,11 @@ function read_write_goto_csv_3() Quiver.close!(writer) reader = Quiver.Reader{Quiver.csv}(filename) - @test_throws ErrorException Quiver.goto!(reader; stage = 1, scenario = 1, block = 1, segment = 1) + stage = scenario = block = segment = 1 + Quiver.goto!(reader; stage, scenario, block, segment) + @test reader.data == [stage, scenario, block + segment] + Quiver.goto!(reader; stage, scenario, block, segment) + @test reader.data == [stage, scenario, block + segment] Quiver.close!(reader) @@ -1014,6 +1136,8 @@ function test_read_write_implementations() read_write_goto_csv_1() read_write_goto_csv_2() read_write_goto_csv_3() + read_write_goto_csv_4() + read_write_goto_csv_5() end end end