From 3e2eab8106cfdc6a03049136ccb5e8547967a0eb Mon Sep 17 00:00:00 2001 From: guilhermebodin Date: Mon, 23 Sep 2024 17:35:43 -0300 Subject: [PATCH] Add more comments in README --- README.md | 18 +++++++++++++----- src/reader.jl | 32 ++++++++++++++++++++++++++++++++ test/test_read_write.jl | 6 ++++++ 3 files changed, 51 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 83c6817..bed20f0 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,11 @@ Quiver is not the fastest data-structure for time series data, but it is designe is to have a set of dimensions that can be used to index the data and a set of values from the time serires attributes. This allows to have a table-like data-structure that can be used to store time series data. -Files that follow the Quiver implementation can be stored in any format that maps directly to a table-like structure with metadata. -CSV files are implemented in a way that the first few lines are used to store the metadata and the rest of the file is used to store the data., i.e. +Files that follow the Quiver implementation can be stored in any format that maps directly to a table-like structure with metadata. The metadata stores the frequency of the time series, the initial date, the unit of the data, the number of the dimension, the maximum value of each dimension, the time dimension and the version of the file. -```csv +The matadata is always stored in a TOML file in the following format: + +```toml version = 1 dimensions = ["stage", "scenario", "block"] dimension_size = [10, 12, 744] @@ -18,11 +19,18 @@ time_dimension = "stage" frequency = "month" unit = "" labels = ["agent_1", "agent_2", "agent_3"] ---- +``` + +And the data is stored in a csv or binary file that contains the values of the time series. The csv format is as follows: +```csv stage,scenario,block,agent_1,agent_2,agent_3 1,1,1,1.0,1.0,1.0 1,1,2,1.0,1.0,1.0 1,1,3,1.0,1.0,1.0 ``` -The metadata stores the frequency of the time series, the initial date, the unit of the data, the number of the dimension, the maximum value of each dimension, the time dimension and the version of the file. \ No newline at end of file +## Installation + +```julia +pkg> add Quiver +``` \ No newline at end of file diff --git a/src/reader.jl b/src/reader.jl index aec83ff..1cd35ce 100644 --- a/src/reader.jl +++ b/src/reader.jl @@ -80,6 +80,14 @@ function _move_data_from_buffer_cache_to_data!(reader::Reader) return nothing end +""" + goto!( + reader::Reader; + dims... + ) + +Move the reader to the specified dimensions and return the data. +""" function goto!(reader::Reader; dims...) validate_dimensions(reader.metadata, dims...) _build_dimension_to_read!(reader; dims...) @@ -89,12 +97,22 @@ function goto!(reader::Reader; dims...) return reader.data end +""" + next_dimension!(reader::Reader) + +Move the reader to the next dimension and return the data. +""" function next_dimension!(reader::Reader) _quiver_next_dimension!(reader) _move_data_from_buffer_cache_to_data!(reader) return reader.data end +""" + max_index(reader::Reader, dimension::String) + +Return the maximum index of the specified dimension. +""" function max_index(reader::Reader, dimension::String) symbol_dim = Symbol(dimension) index = findfirst(isequal(symbol_dim), reader.metadata.dimensions) @@ -104,6 +122,11 @@ function max_index(reader::Reader, dimension::String) return reader.metadata.dimension_size[index] end +""" + close!(reader::Reader) + +Close the reader. +""" function close!(reader::Reader) _quiver_close!(reader) return nothing @@ -148,6 +171,15 @@ function file_to_array( return data, metadata end +""" + file_to_df( + filename::String, + implementation::Type{I}; + labels_to_read::Vector{String} = String[], + ) where {I <: Implementation} + +Reads a file and returns the data and metadata as a DataFrame. +""" function file_to_df( filename::String, implementation::Type{I}; diff --git a/test/test_read_write.jl b/test/test_read_write.jl index 21b4b4c..21e05f1 100644 --- a/test/test_read_write.jl +++ b/test/test_read_write.jl @@ -1115,6 +1115,9 @@ function read_file_to_array(impl) for i in eachindex(data) @test data[i] == data_read[i] end + + rm("$filename.$(Quiver.file_extension(impl))") + rm("$filename.toml") end function read_file_to_df(impl) @@ -1164,6 +1167,9 @@ function read_file_to_df(impl) @test DataFrames.metadata(df, "time_dimension") == "stage" @test DataFrames.metadata(df, "dimensions") == ["stage", "scenario", "block"] @test DataFrames.metadata(df, "labels") == ["agent_1", "agent_2", "agent_3"] + + rm("$filename.$(Quiver.file_extension(impl))") + rm("$filename.toml") end function test_read_write_implementations()