Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update branch #21

Merged
merged 15 commits into from
Sep 16, 2024
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Quiver"
uuid = "cdbb3f72-2527-4dbd-9d0e-93533a5519ac"
authors = ["raphasampaio", "guilhermebodin"]
version = "0.1.2"
version = "0.1.5"

[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
Expand Down
2 changes: 2 additions & 0 deletions src/Quiver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@ include("reader.jl")
include("csv.jl")
include("binary.jl")

include("merge.jl")

end
2 changes: 1 addition & 1 deletion src/binary.jl
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ function _quiver_goto!(reader::Quiver.Reader{binary})
end

function _quiver_next_dimension!(reader::Quiver.Reader{binary})
error("Not implemented")
error("`next_dimension!`: not implemented for the `binary` implementaiton, use `goto` instead.")
return nothing
end

Expand Down
69 changes: 65 additions & 4 deletions src/csv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,74 @@ function _quiver_next_dimension!(reader::Quiver.Reader{csv})
for (i, ts) in enumerate(reader.metadata.labels)
reader.all_labels_data_cache[i] = row[Symbol(ts)]
end
next = iterate(reader.reader.iterator, state)
reader.reader.next = next
reader.reader.next = iterate(reader.reader.iterator, state)
return nothing
end

function _quiver_goto!(reader::Quiver.Reader{csv}, dims...)
error("_quiver_goto! not implemented for csv")
function _calculate_order_in_file(metadata::Quiver.Metadata, dims...)
position = 0
for i in 1:metadata.number_of_dimensions - 1
position += (dims[i] - 1) * performant_product_from_index_i_to_j(
metadata.dimension_size,
i + 1,
metadata.number_of_dimensions
)
end
position += (dims[end] - 1)
return position
end

function _current_dimension_in_iterator(reader::Quiver.Reader{csv})
if reader.reader.next === nothing
error("No more data to read")
end
(row, state) = reader.reader.next
dims = zeros(Int, reader.metadata.number_of_dimensions)
for (i, dim) in enumerate(reader.metadata.dimensions)
dims[i] = row[dim]
end
return dims
end

function _quiver_goto!(reader::Quiver.Reader{csv})
if reader.reader.next === nothing
error("No more data to read")
return nothing
end

dimension_in_iterator = _current_dimension_in_iterator(reader)
dimension_to_read = reader.last_dimension_read

order_of_dimension_in_iterator = _calculate_order_in_file(reader.metadata, dimension_in_iterator...)
order_of_dimension_to_read = _calculate_order_in_file(reader.metadata, dimension_to_read...)

if order_of_dimension_in_iterator > order_of_dimension_to_read
error("Cannot read a dimension that is posterior to the current dimension")
elseif order_of_dimension_in_iterator == order_of_dimension_to_read
(row, state) = reader.reader.next
is_first_index = true
for (i, dim) in enumerate(reader.metadata.dimensions)
reader.last_dimension_read[i] = row[dim]
is_first_index = is_first_index && row[dim] == 1
end

for (i, ts) in enumerate(reader.metadata.labels)
if is_first_index
reader.all_labels_data_cache[i] = row[Symbol(ts)]
else
reader.all_labels_data_cache[i] = NaN
end
end

_quiver_next_dimension!(reader)
return nothing
else
while order_of_dimension_in_iterator <= order_of_dimension_to_read
_quiver_next_dimension!(reader)
dimension_in_iterator = _current_dimension_in_iterator(reader)
order_of_dimension_in_iterator = _calculate_order_in_file(reader.metadata, dimension_in_iterator...)
end
end
return nothing
end

Expand Down
84 changes: 84 additions & 0 deletions src/merge.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
function merge(
output_filename::String,
filenames::Vector{String},
impl::Type{<:Implementation};
digits::Union{Int, Nothing} = nothing,
)
readers = [Quiver.Reader{impl}(filename) for filename in filenames]
metadata = first(readers).metadata
labels = String[]

iterator = 0
msg = ""
for reader in readers
if metadata.dimensions != reader.metadata.dimensions
iterator += 1
msg = "$(msg)[Error $iterator] Dimensions are different. Dimensions in file $(first(readers).filename) is $(metadata.dimensions) and in file $(reader.filename) is $(reader.metadata.dimensions).\n\n"
end
if metadata.dimension_size != reader.metadata.dimension_size
iterator += 1
msg = "$(msg)[Error $iterator] Dimension sizes are different. Dimension size in file $(first(readers).filename) is $(metadata.dimension_size) and in file $(reader.filename) is $(reader.metadata.dimension_size).\n\n"
end
if metadata.time_dimension != reader.metadata.time_dimension
iterator += 1
msg = "$(msg)[Error $iterator] Time dimensions are different. Time dimension in file $(first(readers).filename) is $(metadata.time_dimension) and in file $(reader.filename) is $(reader.metadata.time_dimension).\n\n"
end
if metadata.initial_date != reader.metadata.initial_date
iterator += 1
msg = "$(msg)[Error $iterator] Initial dates are different. Initial date in file $(first(readers).filename) is $(metadata.initial_date) and in file $(reader.filename) is $(reader.metadata.initial_date).\n\n"
end
if metadata.unit != reader.metadata.unit
iterator += 1
msg = "$(msg)[Error $iterator] Units are different. Unit in file $(first(readers).filename) is $(metadata.unit) and in file $(reader.filename) is $(reader.metadata.unit).\n\n"
end
current_label = reader.metadata.labels
for label in current_label
if label in labels
iterator += 1
msg = "$(msg)[Error $iterator] Label $(label) in file $(first(readers).filename) is already in the merged labels.\n\n"
end
end
append!(labels, current_label)
end

if !isempty(msg)
throw(ArgumentError("Merge has $iterator errors.\n\n$msg"))
end

writer = Quiver.Writer{impl}(
output_filename;
labels = labels,
dimensions = string.(metadata.dimensions),
time_dimension = string(metadata.time_dimension),
dimension_size = metadata.dimension_size,
initial_date = metadata.initial_date,
unit = metadata.unit,
)

num_labels = [length(reader.metadata.labels) for reader in readers]
data = zeros(sum(num_labels))
for dims in Iterators.product([1:size for size in reverse(metadata.dimension_size)]...)
dim_kwargs = OrderedDict(metadata.dimensions .=> reverse(dims))
for (i, reader) in enumerate(readers)
Quiver.goto!(reader; dim_kwargs...)
if i == 1
initial_idx = 1
else
initial_idx = sum(num_labels[1:i-1]) + 1
end
final_idx = sum(num_labels[1:i])
data[initial_idx:final_idx] = reader.data
end
if all(isnan.(data))
continue
end
Quiver.write!(writer, round_digits(data, digits); dim_kwargs...)
end

for reader in readers
Quiver.close!(reader)
end

Quiver.close!(writer)
return nothing
end
40 changes: 40 additions & 0 deletions src/reader.jl
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,44 @@ end
function close!(reader::Reader)
_quiver_close!(reader)
return nothing
end

"""
file_to_array(
filename::String,
implementation::Type{I};
labels_to_read::Vector{String} = String[],
) where {I <: Implementation}

Reads a file and returns the data and metadata as a tuple.
"""
function file_to_array(
filename::String,
implementation::Type{I};
labels_to_read::Vector{String} = String[],
) where {I <: Implementation}
reader = Reader{I}(
filename;
labels_to_read,
carrousel = false, # carrousel does not make sense in this implemetations
)

metadata = reader.metadata
dimension_names = reverse(metadata.dimensions)
dimension_sizes = reverse(metadata.dimension_size)
data = zeros(
Float32,
length(reader.labels_to_read),
dimension_sizes...,
)

for dims in Iterators.product([1:size for size in dimension_sizes]...)
dim_kwargs = OrderedDict(Symbol.(dimension_names) .=> dims)
Quiver.goto!(reader; dim_kwargs...)
data[:, dims...] = reader.data
end

Quiver.close!(reader)

return data, metadata
end
85 changes: 85 additions & 0 deletions src/writer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,89 @@ end
function close!(writer::Writer)
_quiver_close!(writer)
return nothing
end

"""
array_to_file(
filename::String,
data::Array{T, N},
implementation::Type{I};
dimensions::Vector{String},
labels::Vector{String},
time_dimension::String,
dimension_size::Vector{Int},
initial_date::Union{String, DateTime} = "",
unit::String = "",
digits::Union{Int, Nothing} = nothing,
) where {I <:Implementation, T, N}

Write a time series file in Quiver format.

Required arguments:

- `file_path::String`: Path to file.
- `data::Array{T, N}`: Data to be written.
- `implementation::Type{I}`: Implementation to be used. It can be `Quiver.csv` or `Quiver.binary`.
- `dimensions::Vector{String}`: Dimensions of the data.
- `labels::Vector{String}`: Labels of the data.
- `time_dimension::String`: Name of the time dimension.
- `dimension_size::Vector{Int}`: Size of each dimension.
- `initial_date::Union{String, DateTime}`: Initial date of the time series. If a string is provided, it should be in the format "yyyy-mm-ddTHH:MM:SS".

Optional arguments:
- `digits::Union{Int, Nothing}`: Number of digits to round the data. If nothing is provided, the data is not rounded.
- `unit::String`: Unit of the time series data.
"""
function array_to_file(
filename::String,
data::Array{T, N},
implementation::Type{I};
dimensions::Vector{String},
labels::Vector{String},
time_dimension::String,
dimension_size::Vector{Int},
initial_date::Union{String, DateTime} = "",
unit::String = "",
digits::Union{Int, Nothing} = nothing,
) where {I <: Implementation, T, N}
kwargs_dict = Dict{Symbol, Any}()
if initial_date !== ""
if isa(initial_date, String)
initial_date = DateTime(initial_date, "yyyy-mm-ddTHH:MM:SS")
end
kwargs_dict[:initial_date] = initial_date
end
if unit != ""
kwargs_dict[:unit] = unit
else
@warn("No unit was provided for the time series file \"$filename\".")
end

writer = Quiver.Writer{implementation}(
filename;
dimensions,
labels,
time_dimension,
dimension_size,
kwargs_dict...,
)

reverse_dimensions = Symbol.(reverse(dimensions))

for dims in Iterators.product([1:size for size in reverse(dimension_size)]...)
dim_kwargs = OrderedDict(reverse_dimensions .=> dims)
Quiver.write!(writer, round_digits(data[:, dims...], digits); dim_kwargs...)
end

Quiver.close!(writer)

return nothing
end

function round_digits(vec::Vector{T}, ::Nothing) where {T}
return vec
end

function round_digits(vec::Vector{T}, digits::Int) where {T}
return round.(vec; digits)
end
Loading
Loading