diff --git a/Project.toml b/Project.toml index 8d791223..c2f47200 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ArviZ" uuid = "131c737c-5715-5e2e-ad31-c244f01c1dc7" authors = ["Seth Axen "] -version = "0.6.0" +version = "0.6.1" [deps] Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d" diff --git a/docs/make.jl b/docs/make.jl index e2afa583..553c12cf 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -47,8 +47,10 @@ makedocs(; "Stats" => "api/stats.md", "Diagnostics" => "api/diagnostics.md", "Data" => "api/data.md", - "InferenceData" => "api/inference_data.md", - "Dataset" => "api/dataset.md", + "InferenceObjects" => [ + "InferenceData" => "api/inference_data.md", + "Dataset" => "api/dataset.md", + ], ], ], checkdocs=:exports, diff --git a/docs/src/api/data.md b/docs/src/api/data.md index cb77ea61..8340ae20 100644 --- a/docs/src/api/data.md +++ b/docs/src/api/data.md @@ -15,7 +15,6 @@ from_samplechains ## IO / General conversion ```@docs -convert_to_inference_data from_dict from_json from_namedtuple @@ -28,7 +27,6 @@ to_netcdf ```@docs concat extract_dataset -merge ``` ## Example data diff --git a/docs/src/api/inference_data.md b/docs/src/api/inference_data.md index d5b2c0f5..f4b5ad97 100644 --- a/docs/src/api/inference_data.md +++ b/docs/src/api/inference_data.md @@ -28,3 +28,15 @@ Base.setindex `InferenceData` also implements the same iteration interface as its underlying `NamedTuple`. That is, iterating over an `InferenceData` iterates over its groups. + +## General conversion + +```@docs +convert_to_inference_data +``` + +## General functions + +```@docs +merge +``` diff --git a/src/ArviZ.jl b/src/ArviZ.jl index 63796796..4db4a0d1 100644 --- a/src/ArviZ.jl +++ b/src/ArviZ.jl @@ -2,7 +2,6 @@ __precompile__() module ArviZ using Base: @__doc__ -using Dates using Requires using REPL using DataFrames @@ -35,6 +34,15 @@ import StatsBase: summarystats import Markdown: @doc_str import PyCall: PyObject +include("InferenceObjects/InferenceObjects.jl") + +using .InferenceObjects +import .InferenceObjects: convert_to_inference_data, namedtuple_of_arrays +# internal functions temporarily used/extended here +using .InferenceObjects: + attributes, flatten, groupnames, groups, hasgroup, rekey, setattribute! +import .InferenceObjects: namedtuple_of_arrays + # Exports ## Plots @@ -129,14 +137,11 @@ end include("utils.jl") include("rcparams.jl") include("xarray.jl") -include("dataset.jl") -include("inference_data.jl") include("data.jl") include("diagnostics.jl") include("plots.jl") include("bokeh.jl") include("stats.jl") include("stats_utils.jl") -include("namedtuple.jl") end # module diff --git a/src/InferenceObjects/InferenceObjects.jl b/src/InferenceObjects/InferenceObjects.jl new file mode 100644 index 00000000..07eec7ab --- /dev/null +++ b/src/InferenceObjects/InferenceObjects.jl @@ -0,0 +1,40 @@ +module InferenceObjects + +using Dates: Dates +using DimensionalData: DimensionalData, Dimensions, LookupArrays +using OrderedCollections: OrderedDict + +# groups that are officially listed in the schema +const SCHEMA_GROUPS = ( + :posterior, + :posterior_predictive, + :predictions, + :log_likelihood, + :sample_stats, + :prior, + :prior_predictive, + :sample_stats_prior, + :observed_data, + :constant_data, + :predictions_constant_data, + :warmup_posterior, + :warmup_posterior_predictive, + :warmup_predictions, + :warmup_sample_stats, + :warmup_log_likelihood, +) +const SCHEMA_GROUPS_DICT = Dict(n => i for (i, n) in enumerate(SCHEMA_GROUPS)) +const DEFAULT_SAMPLE_DIMS = Dimensions.key2dim((:chain, :draw)) + +export Dataset, InferenceData +export convert_to_dataset, convert_to_inference_data, from_namedtuple, namedtuple_to_dataset + +include("utils.jl") +include("dimensions.jl") +include("dataset.jl") +include("inference_data.jl") +include("convert_dataset.jl") +include("convert_inference_data.jl") +include("from_namedtuple.jl") + +end # module diff --git a/src/InferenceObjects/convert_dataset.jl b/src/InferenceObjects/convert_dataset.jl new file mode 100644 index 00000000..8706055d --- /dev/null +++ b/src/InferenceObjects/convert_dataset.jl @@ -0,0 +1,19 @@ +Base.convert(::Type{Dataset}, obj) = convert_to_dataset(obj) +Base.convert(::Type{Dataset}, obj::Dataset) = obj + +""" + convert_to_dataset(obj; group = :posterior, kwargs...) -> Dataset + +Convert a supported object to a `Dataset`. + +In most cases, this function calls [`convert_to_inference_data`](@ref) and returns the +corresponding `group`. +""" +function convert_to_dataset end + +function convert_to_dataset(obj; group::Symbol=:posterior, kwargs...) + idata = convert_to_inference_data(obj; group, kwargs...) + dataset = getproperty(idata, group) + return dataset +end +convert_to_dataset(data::Dataset; kwargs...) = data diff --git a/src/InferenceObjects/convert_inference_data.jl b/src/InferenceObjects/convert_inference_data.jl new file mode 100644 index 00000000..fbcbeaa0 --- /dev/null +++ b/src/InferenceObjects/convert_inference_data.jl @@ -0,0 +1,90 @@ +""" + convert(::Type{InferenceData}, obj) + +Convert `obj` to an `InferenceData`. + +`obj` can be any type for which [`convert_to_inference_data`](@ref) is defined. +""" +Base.convert(::Type{InferenceData}, obj) = convert_to_inference_data(obj) +Base.convert(::Type{InferenceData}, obj::InferenceData) = obj +Base.convert(::Type{NamedTuple}, data::InferenceData) = NamedTuple(data) +NamedTuple(data::InferenceData) = parent(data) + +""" + convert_to_inference_data(obj; group, kwargs...) -> InferenceData + +Convert a supported object to an [`InferenceData`](@ref) object. + +If `obj` converts to a single dataset, `group` specifies which dataset in the resulting +`InferenceData` that is. + +See [`convert_to_dataset`](@ref) + +# Arguments + + - `obj` can be many objects. Basic supported types are: + + + [`InferenceData`](@ref): return unchanged + + [`Dataset`](@ref)/`DimensionalData.AbstractDimStack`: add to `InferenceData` as the only + group + + `NamedTuple`/`AbstractDict`: create a `Dataset` as the only group + + `AbstractArray{<:Real}`: create a `Dataset` as the only group, given an arbitrary + name, if the name is not set + +More specific types may be documented separately. + +# Keywords + + - `group::Symbol = :posterior`: If `obj` converts to a single dataset, assign the resulting + dataset to this group. + + - `dims`: a collection mapping variable names to collections of objects containing + dimension names. Acceptable such objects are: + + + `Symbol`: dimension name + + `Type{<:DimensionsionalData.Dimension}`: dimension type + + `DimensionsionalData.Dimension`: dimension, potentially with indices + + `Nothing`: no dimension name provided, dimension name is automatically generated + - `coords`: a collection indexable by dimension name specifying the indices of the given + dimension. If indices for a dimension in `dims` are provided, they are used even if + the dimension contains its own indices. If a dimension is missing, its indices are + automatically generated. + - `kwargs`: remaining keywords forwarded to converter functions +""" +function convert_to_inference_data end + +convert_to_inference_data(data::InferenceData; kwargs...) = data +function convert_to_inference_data(stack::DimensionalData.AbstractDimStack; kwargs...) + return convert_to_inference_data(Dataset(stack); kwargs...) +end +function convert_to_inference_data(data::Dataset; group=:posterior, kwargs...) + return convert_to_inference_data(InferenceData(; group => data); kwargs...) +end +function convert_to_inference_data(data::AbstractDict{Symbol}; kwargs...) + return convert_to_inference_data(NamedTuple(data); kwargs...) +end +function convert_to_inference_data(var_data::AbstractArray{<:Real}; kwargs...) + data = (; default_var_name(var_data) => var_data) + return convert_to_inference_data(data; kwargs...) +end +function convert_to_inference_data( + data::NamedTuple{<:Any,<:Tuple{Vararg{AbstractArray{<:Real}}}}; + group=:posterior, + kwargs..., +) + ds = namedtuple_to_dataset(data; kwargs...) + return convert_to_inference_data(ds; group) +end + +""" + default_var_name(data) -> Symbol + +Return the default name for the variable whose values are stored in `data`. +""" +default_var_name(data) = :x +function default_var_name(data::DimensionalData.AbstractDimArray) + name = DimensionalData.name(data) + name isa Symbol && return name + name isa AbstractString && !isempty(name) && return Symbol(name) + return default_var_name(parent(data)) +end diff --git a/src/InferenceObjects/dataset.jl b/src/InferenceObjects/dataset.jl new file mode 100644 index 00000000..23cdec69 --- /dev/null +++ b/src/InferenceObjects/dataset.jl @@ -0,0 +1,128 @@ +""" + Dataset{L} <: DimensionalData.AbstractDimStack{L} + +Container of dimensional arrays sharing some dimensions. + +This type is an +[`DimensionalData.AbstractDimStack`](https://rafaqz.github.io/DimensionalData.jl/stable/api/#DimensionalData.AbstractDimStack) +that implements the same interface as `DimensionalData.DimStack` and has identical usage. + +When a `Dataset` is passed to Python, it is converted to an `xarray.Dataset` without copying +the data. That is, the Python object shares the same memory as the Julia object. However, +if an `xarray.Dataset` is passed to Julia, its data must be copied. + +# Constructors + + Dataset(data::DimensionalData.AbstractDimArray...) + Dataset(data::Tuple{Vararg{<:DimensionalData.AbstractDimArray}}) + Dataset(data::NamedTuple{Keys,Vararg{<:DimensionalData.AbstractDimArray}}) + Dataset( + data::NamedTuple, + dims::Tuple{Vararg{DimensionalData.Dimension}}; + metadata=DimensionalData.NoMetadata(), + ) + +In most cases, use [`convert_to_dataset`](@ref) to create a `Dataset` instead of directly +using a constructor. +""" +struct Dataset{L,D<:DimensionalData.AbstractDimStack{L}} <: + DimensionalData.AbstractDimStack{L} + data::D +end + +Dataset(args...; kwargs...) = Dataset(DimensionalData.DimStack(args...; kwargs...)) +Dataset(data::Dataset) = data + +Base.parent(data::Dataset) = getfield(data, :data) + +Base.propertynames(data::Dataset) = keys(data) + +Base.getproperty(data::Dataset, k::Symbol) = getindex(data, k) + +function setattribute!(data::Dataset, k::Symbol, value) + setindex!(DimensionalData.metadata(data), value, k) + return value +end +@deprecate setattribute!(data::Dataset, k::AbstractString, value) setattribute!( + data, Symbol(k), value +) false + +""" + namedtuple_to_dataset(data; kwargs...) -> Dataset + +Convert `NamedTuple` mapping variable names to arrays to a [`Dataset`](@ref). + +# Keywords + + - `attrs`: a Symbol-indexable collection of metadata to attach to the dataset, in addition + to defaults. Values should be JSON serializable. + + - `library::Union{String,Module}`: library used for performing inference. Will be attached + to the `attrs` metadata. + - `dims`: a collection mapping variable names to collections of objects containing dimension + names. Acceptable such objects are: + + + `Symbol`: dimension name + + `Type{<:DimensionsionalData.Dimension}`: dimension type + + `DimensionsionalData.Dimension`: dimension, potentially with indices + + `Nothing`: no dimension name provided, dimension name is automatically generated + - `coords`: a collection indexable by dimension name specifying the indices of the given + dimension. If indices for a dimension in `dims` are provided, they are used even if + the dimension contains its own indices. If a dimension is missing, its indices are + automatically generated. +""" +function namedtuple_to_dataset end +function namedtuple_to_dataset( + data; attrs=(;), library=nothing, dims=(;), coords=(;), default_dims=DEFAULT_SAMPLE_DIMS +) + dim_arrays = map(keys(data)) do var_name + var_data = data[var_name] + var_dims = get(dims, var_name, ()) + return array_to_dimarray(var_data, var_name; dims=var_dims, coords, default_dims) + end + attributes = merge(default_attributes(library), attrs) + metadata = OrderedDict{Symbol,Any}(pairs(attributes)) + return Dataset(dim_arrays...; metadata) +end + +""" + default_attributes(library=nothing) -> NamedTuple + +Generate default attributes metadata for a dataset generated by inference library `library`. + +`library` may be a `String` or a `Module`. +""" +function default_attributes(library=nothing) + return ( + created_at=Dates.format(Dates.now(), Dates.ISODateTimeFormat), + library_attributes(library)..., + ) +end + +library_attributes(library) = (; inference_library=string(library)) +library_attributes(::Nothing) = (;) +function library_attributes(library::Module) + return ( + inference_library=string(library), + inference_library_version=string(package_version(library)), + ) +end + +# DimensionalData interop + +for f in [:data, :dims, :refdims, :metadata, :layerdims, :layermetadata] + @eval begin + DimensionalData.$(f)(ds::Dataset) = DimensionalData.$(f)(parent(ds)) + end +end + +# Warning: this is not an API function and probably should be implemented abstractly upstream +DimensionalData.show_after(io, mime, ::Dataset) = nothing + +attributes(data::DimensionalData.AbstractDimStack) = DimensionalData.metadata(data) + +Base.convert(T::Type{<:DimensionalData.DimStack}, data::Dataset) = convert(T, parent(data)) + +function DimensionalData.rebuild(data::Dataset; kwargs...) + return Dataset(DimensionalData.rebuild(parent(data); kwargs...)) +end diff --git a/src/InferenceObjects/dimensions.jl b/src/InferenceObjects/dimensions.jl new file mode 100644 index 00000000..006eec2f --- /dev/null +++ b/src/InferenceObjects/dimensions.jl @@ -0,0 +1,119 @@ +has_all_sample_dims(dims) = all(Dimensions.hasdim(dims, DEFAULT_SAMPLE_DIMS)) + +""" + as_dimension(dim, coords, axis) -> DimensionsionalData.Dimension + +Convert `dim`, `coords`, and `axis` to a `Dimension` object. + +# Arguments + + - `dim`: An object specifying the name and potentially indices of a dimension. Can be the + following types: + + + `Symbol`: dimension name. + + `Type{<:DimensionsionalData.Dimension}`: dimension type + + `DimensionsionalData.Dimension`: dimension, potentially with indices + + - `coords`: a collection indexable by dimension name specifying the indices of the given + dimension. If indices are provided, they are used even if `dim` contains its own + indices. If a dimension is missing, its indices are automatically generated. + - `axis`: A default axis to be used if `coords` and `dim` indices are not provided. +""" +function as_dimension(dim, coords, axis) + D = Dimensions.basetypeof(Dimensions.basedims(dim)) + inds = if dim isa Dimensions.Dimension + vals = LookupArrays.val(dim) + vals isa AbstractVector ? vals : axis + else + axis + end + return D(get(coords, Dimensions.name(D), inds)) +end + +""" + generate_dims(array, name; dims, coords, default_dims) + +Generate `DimensionsionalData.Dimension` objects for each dimension of `array`. + +`name` indicates the name of the variable represented by array. + +# Keywords + + - `dims`: A collection of objects indicating dimension names. If any dimensions are not + provided, their names are automatically generated. Acceptable types of entries are: + + + `Symbol`: dimension name + + `Type{<:DimensionsionalData.Dimension}`: dimension type + + `DimensionsionalData.Dimension`: dimension, potentially with indices + + `Nothing`: no dimension name provided, dimension name is automatically generated + + - `coords`: a collection indexable by dimension name specifying the indices of the given + dimension. If indices for a dimension in `dims` are provided, they are used even if + the dimension contains its own indices. If a dimension is missing, its indices are + automatically generated. + - `default_dims`: A collection of dims to be prepended to `dims` whose elements have the + same constraints. +""" +function generate_dims(array, name; dims=(), coords=(;), default_dims=()) + num_default_dims = length(default_dims) + length(dims) + num_default_dims > ndims(array) && @error "blah" + dims_named = ntuple(ndims(array) - length(default_dims)) do i + dim = get(dims, i, nothing) + dim === nothing && return Symbol("$(name)_dim_$(i)") + return dim + end + dims_all = (default_dims..., dims_named...) + axes_all = axes(array) + T = NTuple{ndims(array),Dimensions.Dimension} + dims_with_coords = as_dimension.(dims_all, Ref(coords), axes_all)::T + return Dimensions.format(dims_with_coords, array)::T +end + +""" + array_to_dimarray(array, name; kwargs...) -> DimensionalData.AbstractDimArray + +Convert `array` to a `AbstractDimArray` with name `name`. + +If `array` is already an `AbstractDimArray`, then it is returned without modification. +See [`generate_dims`](@ref) for a description of `kwargs`. +""" +function array_to_dimarray(data, name; dims=(), coords=(;), default_dims=()) + array = if ndims(data) < 2 && has_all_sample_dims(default_dims) + reshape(data, 1, :) + else + data + end + array_dims = generate_dims(array, name; dims, coords, default_dims) + return DimensionalData.DimArray(array, array_dims; name) +end +function array_to_dimarray(array::DimensionalData.AbstractDimArray, name; kwargs...) + return DimensionalData.rebuild(array; name) +end + +""" + AsSlice{T<:LookupArrays.Selector} <: LookupArrays.Selector{T} + + AsSlice(selector) + +Selector that ensures selected indices are arrays so that slicing occurs. + +This is useful to ensure that selecting a single index still returns an array. +""" +struct AsSlice{T<:LookupArrays.Selector} <: LookupArrays.Selector{T} + val::T +end + +function LookupArrays.selectindices(l::LookupArrays.LookupArray, sel::AsSlice; kw...) + i = LookupArrays.selectindices(l, LookupArrays.val(sel); kw...) + inds = i isa AbstractVector ? i : [i] + return inds +end + +""" + index_to_indices(index) + +Convert `index` to a collection of indices or a selector representing such a collection. +""" +index_to_indices(i) = i +index_to_indices(i::Int) = [i] +index_to_indices(sel::LookupArrays.Selector) = AsSlice(sel) diff --git a/src/namedtuple.jl b/src/InferenceObjects/from_namedtuple.jl similarity index 65% rename from src/namedtuple.jl rename to src/InferenceObjects/from_namedtuple.jl index 370f3dcb..e57735d0 100644 --- a/src/namedtuple.jl +++ b/src/InferenceObjects/from_namedtuple.jl @@ -1,30 +1,4 @@ """ - namedtuple_of_arrays(x::NamedTuple) -> NamedTuple - namedtuple_of_arrays(x::AbstractArray{NamedTuple}) -> NamedTuple - namedtuple_of_arrays(x::AbstractArray{AbstractArray{<:NamedTuple}}) -> NamedTuple - -Given a container of `NamedTuple`s, concatenate them, using the container dimensions as the -dimensions of the resulting arrays. - -# Examples - -```@example -using ArviZ -nchains, ndraws = 4, 100 -data = [(x=rand(), y=randn(2), z=randn(2, 3)) for _ in 1:nchains, _ in 1:ndraws]; -ntarray = ArviZ.namedtuple_of_arrays(data); -``` -""" -namedtuple_of_arrays(x::NamedTuple) = map(flatten, x) -namedtuple_of_arrays(x::AbstractArray) = namedtuple_of_arrays(namedtuple_of_arrays.(x)) -function namedtuple_of_arrays(x::AbstractArray{<:NamedTuple{K}}) where {K} - return mapreduce(merge, K) do k - v = flatten.(getproperty.(x, k)) - return (; k => flatten(v)) - end -end - -@doc doc""" from_namedtuple(posterior::NamedTuple; kwargs...) -> InferenceData from_namedtuple(posterior::Vector{<:NamedTuple}; kwargs...) -> InferenceData from_namedtuple(posterior::Matrix{<:NamedTuple}; kwargs...) -> InferenceData @@ -45,39 +19,40 @@ whose first dimensions correspond to the dimensions of the containers. # Arguments -- `posterior`: The data to be converted. It may be of the following types: - + `::NamedTuple`: The keys are the variable names and the values are arrays with + - `posterior`: The data to be converted. It may be of the following types: + + + `::NamedTuple`: The keys are the variable names and the values are arrays with dimensions `(nchains, ndraws, sizes...)`. - + `::Matrix{<:NamedTuple}`: Each element is a single draw from a single chain, with + + `::Matrix{<:NamedTuple}`: Each element is a single draw from a single chain, with array/scalar values with dimensions `sizes`. The dimensions of the matrix container are `(nchains, ndraws)` - + `::Vector{Vector{<:NamedTuple}}`: The same as the above case. + + `::Vector{Vector{<:NamedTuple}}`: The same as the above case. # Keywords -- `posterior_predictive::Any=nothing`: Draws from the posterior predictive distribution -- `sample_stats::Any=nothing`: Statistics of the posterior sampling process -- `predictions::Any=nothing`: Out-of-sample predictions for the posterior. -- `prior::Any=nothing`: Draws from the prior -- `prior_predictive::Any=nothing`: Draws from the prior predictive distribution -- `sample_stats_prior::Any=nothing`: Statistics of the prior sampling process -- `observed_data::NamedTuple`: Observed data on which the `posterior` is - conditional. It should only contain data which is modeled as a random variable. Keys - are parameter names and values. -- `constant_data::NamedTuple`: Model constants, data included in the model - which is not modeled as a random variable. Keys are parameter names and values. -- `predictions_constant_data::NamedTuple`: Constants relevant to the model - predictions (i.e. new `x` values in a linear regression). -- `log_likelihood`: Pointwise log-likelihood for the data. It is recommended - to use this argument as a `NamedTuple` whose keys are observed variable names and whose - values are log likelihood arrays. -- `library`: Name of library that generated the draws -- `coords`: Map from named dimension to named indices -- `dims`: Map from variable name to names of its dimensions + - `posterior_predictive::Any=nothing`: Draws from the posterior predictive distribution + - `sample_stats::Any=nothing`: Statistics of the posterior sampling process + - `predictions::Any=nothing`: Out-of-sample predictions for the posterior. + - `prior::Any=nothing`: Draws from the prior + - `prior_predictive::Any=nothing`: Draws from the prior predictive distribution + - `sample_stats_prior::Any=nothing`: Statistics of the prior sampling process + - `observed_data::NamedTuple`: Observed data on which the `posterior` is + conditional. It should only contain data which is modeled as a random variable. Keys + are parameter names and values. + - `constant_data::NamedTuple`: Model constants, data included in the model + which is not modeled as a random variable. Keys are parameter names and values. + - `predictions_constant_data::NamedTuple`: Constants relevant to the model + predictions (i.e. new `x` values in a linear regression). + - `log_likelihood`: Pointwise log-likelihood for the data. It is recommended + to use this argument as a `NamedTuple` whose keys are observed variable names and whose + values are log likelihood arrays. + - `library`: Name of library that generated the draws + - `coords`: Map from named dimension to named indices + - `dims`: Map from variable name to names of its dimensions # Returns -- `InferenceData`: The data with groups corresponding to the provided data + - `InferenceData`: The data with groups corresponding to the provided data # Examples @@ -86,19 +61,17 @@ using ArviZ nchains, ndraws = 2, 10 data1 = ( - x = rand(nchains, ndraws), - y = randn(nchains, ndraws, 2), - z = randn(nchains, ndraws, 3, 2), + x=rand(nchains, ndraws), y=randn(nchains, ndraws, 2), z=randn(nchains, ndraws, 3, 2) ) idata1 = from_namedtuple(data1) -data2 = [(x = rand(ndraws), y = randn(ndraws, 2), z = randn(ndraws, 3, 2)) for _ = 1:nchains]; +data2 = [(x=rand(ndraws), y=randn(ndraws, 2), z=randn(ndraws, 3, 2)) for _ in 1:nchains]; idata2 = from_namedtuple(data2) -data3 = [(x = rand(), y = randn(2), z = randn(3, 2)) for _ = 1:nchains, _ = 1:ndraws]; +data3 = [(x=rand(), y=randn(2), z=randn(3, 2)) for _ in 1:nchains, _ in 1:ndraws]; idata3 = from_namedtuple(data3) -data4 = [[(x = rand(), y = randn(2), z = randn(3, 2)) for _ = 1:ndraws] for _ = 1:nchains]; +data4 = [[(x=rand(), y=randn(2), z=randn(3, 2)) for _ in 1:ndraws] for _ in 1:nchains]; idata4 = from_namedtuple(data4) ``` """ diff --git a/src/inference_data.jl b/src/InferenceObjects/inference_data.jl similarity index 70% rename from src/inference_data.jl rename to src/InferenceObjects/inference_data.jl index 7d9c4f01..5488d285 100644 --- a/src/inference_data.jl +++ b/src/InferenceObjects/inference_data.jl @@ -3,7 +3,7 @@ Container for inference data storage using DimensionalData. -This object implements the [InferenceData schema](https://python.arviz.org/en/v$(arviz_version())/schema/schema.html). +This object implements the [InferenceData schema](https://python.arviz.org/en/latest/schema/schema.html). Internally, groups are stored in a `NamedTuple`, which can be accessed using `parent(::InferenceData)`. @@ -35,18 +35,6 @@ InferenceData(data::InferenceData) = data Base.parent(data::InferenceData) = getfield(data, :groups) -""" - convert(::Type{InferenceData}, obj) - -Convert `obj` to an `InferenceData`. - -`obj` can be any type for which [`convert_to_inference_data`](@ref) is defined. -""" -Base.convert(::Type{InferenceData}, obj) = convert_to_inference_data(obj) -Base.convert(::Type{InferenceData}, obj::InferenceData) = obj -Base.convert(::Type{NamedTuple}, data::InferenceData) = NamedTuple(data) -NamedTuple(data::InferenceData) = parent(data) - # these 3 interfaces ensure InferenceData behaves like a NamedTuple # properties interface @@ -80,51 +68,66 @@ exception will be raised. # Examples -Select data from all groups for just the specified schools. +Select data from all groups for just the specified id values. ```@repl getindex julia> using ArviZ, DimensionalData -julia> idata = load_arviz_data("centered_eight"); +julia> idata = from_namedtuple( + (θ=randn(4, 100, 4), τ=randn(4, 100)); + prior=(θ=randn(4, 100, 4), τ=randn(4, 100)), + observed_data=(y=randn(4),), + dims=(θ=[:id], y=[:id]), + coords=(id=["a", "b", "c", "d"],), + ) +InferenceData with groups: + > posterior + > prior + > observed_data + +julia> idata.posterior +Dataset with dimensions: + Dim{:chain} Sampled 1:4 ForwardOrdered Regular Points, + Dim{:draw} Sampled 1:100 ForwardOrdered Regular Points, + Dim{:id} Categorical String[a, b, c, d] ForwardOrdered +and 2 layers: + :θ Float64 dims: Dim{:chain}, Dim{:draw}, Dim{:id} (4×100×4) + :τ Float64 dims: Dim{:chain}, Dim{:draw} (4×100) + +with metadata OrderedCollections.OrderedDict{Symbol, Any} with 1 entry: + :created_at => "2022-08-11T11:15:21.4" -julia> idata_sel = idata[school=At(["Choate", "Deerfield"])] +julia> idata_sel = idata[id=At(["a", "b"])] InferenceData with groups: > posterior - > posterior_predictive - > sample_stats > prior > observed_data julia> idata_sel.posterior Dataset with dimensions: - Dim{:chain} Sampled 0:3 ForwardOrdered Regular Points, - Dim{:draw} Sampled 0:499 ForwardOrdered Regular Points, - Dim{:school} Categorical String[Choate, Deerfield] Unordered -and 3 layers: - :mu Float64 dims: Dim{:chain}, Dim{:draw} (4×500) - :theta Float64 dims: Dim{:chain}, Dim{:draw}, Dim{:school} (4×500×2) - :tau Float64 dims: Dim{:chain}, Dim{:draw} (4×500) - -with metadata OrderedCollections.OrderedDict{Symbol, Any} with 3 entries: - :created_at => "2019-06-21T17:36:34.398087" - :inference_library_version => "3.7" - :inference_library => "pymc3" + Dim{:chain} Sampled 1:4 ForwardOrdered Regular Points, + Dim{:draw} Sampled 1:100 ForwardOrdered Regular Points, + Dim{:id} Categorical String[a, b] ForwardOrdered +and 2 layers: + :θ Float64 dims: Dim{:chain}, Dim{:draw}, Dim{:id} (4×100×2) + :τ Float64 dims: Dim{:chain}, Dim{:draw} (4×100) + +with metadata OrderedCollections.OrderedDict{Symbol, Any} with 1 entry: + :created_at => "2022-08-11T11:15:21.4" ``` Select data from just the posterior, returning a `Dataset` if the indices index more than one element from any of the variables: ```@repl getindex -julia> idata[:observed_data, school=At(["Choate"])] +julia> idata[:observed_data, id=At(["a"])] Dataset with dimensions: - Dim{:school} Categorical String[Choate] Unordered + Dim{:id} Categorical String[a] ForwardOrdered and 1 layer: - :obs Float64 dims: Dim{:school} (1) + :y Float64 dims: Dim{:id} (1) -with metadata OrderedCollections.OrderedDict{Symbol, Any} with 3 entries: - :created_at => "2019-06-21T17:36:34.491909" - :inference_library_version => "3.7" - :inference_library => "pymc3" +with metadata OrderedCollections.OrderedDict{Symbol, Any} with 1 entry: + :created_at => "2022-08-11T11:19:25.982" ``` Note that if a single index is provided, the behavior is still to slice so that the @@ -151,7 +154,7 @@ function Base.getindex(data::InferenceData; kwargs...) # will be a `Dataset` if the group has other dimensions or `NamedTuple` # if it has no other dimensions. # So we promote to an array of indices - new_kwargs = map(_index_to_indices, NamedTuple(kwargs)) + new_kwargs = map(index_to_indices, NamedTuple(kwargs)) groups = map(parent(data)) do ds return getindex(ds; new_kwargs...) end @@ -220,12 +223,30 @@ Return `true` if a group with name `name` is stored in `data`. """ hasgroup(data::InferenceData, name::Symbol) = haskey(data, name) -_index_to_indices(i) = i -_index_to_indices(i::Int) = [i] -_index_to_indices(sel::Dimensions.Selector) = AsSlice(sel) - @generated function _reorder_group_names(::Val{names}) where {names} - return Tuple(sort(collect(names); by=k -> SUPPORTED_GROUPS_DICT[k])) + lt = (a, b) -> (a isa Integer && b isa Integer) ? a < b : string(a) < string(b) + return Tuple(sort(collect(names); lt, by=k -> get(SCHEMA_GROUPS_DICT, k, string(k)))) end @generated _keys_and_types(::NamedTuple{keys,types}) where {keys,types} = (keys, types) + +""" + merge(data::InferenceData, others::InferenceData...) -> InferenceData + +Merge [`InferenceData`](@ref) objects. + +The result contains all groups in `data` and `others`. +If a group appears more than once, the one that occurs first is kept. + +See [`concat`](@ref) +""" +function Base.merge(data::InferenceData, others::InferenceData...) + return InferenceData(Base.merge(groups(data), map(groups, others)...)) +end + +function rekey(data::InferenceData, keymap) + groups_old = groups(data) + names_new = map(k -> get(keymap, k, k), propertynames(groups_old)) + groups_new = NamedTuple{names_new}(Tuple(groups_old)) + return InferenceData(groups_new) +end diff --git a/src/InferenceObjects/utils.jl b/src/InferenceObjects/utils.jl new file mode 100644 index 00000000..7c63026d --- /dev/null +++ b/src/InferenceObjects/utils.jl @@ -0,0 +1,79 @@ +""" + flatten(x) + +If `x` is an array of arrays, flatten into a single array whose dimensions are ordered with +dimensions of the outermost container first and innermost container last. +""" +flatten(x) = x +flatten(x::AbstractArray{<:Number}) = convert(Array, x) +function flatten(x::AbstractArray{S}) where {T<:Number,N,S<:AbstractArray{T,N}} + ret = Array{T}(undef, (size(x)..., size(x[1])...)) + for k in keys(x) + setindex!(ret, x[k], k, (Colon() for _ in 1:N)...) + end + return ret +end + +""" + namedtuple_of_arrays(x::NamedTuple) -> NamedTuple + namedtuple_of_arrays(x::AbstractArray{NamedTuple}) -> NamedTuple + namedtuple_of_arrays(x::AbstractArray{AbstractArray{<:NamedTuple}}) -> NamedTuple + +Given a container of `NamedTuple`s, concatenate them, using the container dimensions as the +dimensions of the resulting arrays. + +# Examples + +```@example +using ArviZ +nchains, ndraws = 4, 100 +data = [(x=rand(), y=randn(2), z=randn(2, 3)) for _ in 1:nchains, _ in 1:ndraws]; +ntarray = ArviZ.namedtuple_of_arrays(data); +``` +""" +function namedtuple_of_arrays end +namedtuple_of_arrays(x::NamedTuple) = map(flatten, x) +namedtuple_of_arrays(x::AbstractArray) = namedtuple_of_arrays(namedtuple_of_arrays.(x)) +function namedtuple_of_arrays(x::AbstractArray{<:NamedTuple{K}}) where {K} + return mapreduce(merge, K) do k + v = flatten.(getproperty.(x, k)) + return (; k => flatten(v)) + end +end + +""" + package_version(pkg::Module) -> Union{Nothing,VersionNumber} + +Return version number of package `pkg`. + +If `pkg` does not have a version module (e.g. it is a submodule), then `nothing` is +returned. +""" +function package_version(pkg::Module) + @static if isdefined(Base, :pkgversion) + return Base.pkgversion(pkg) + end + pkg_path = pathof(pkg) + pkg_path === nothing && return nothing + project = joinpath(dirname(dirname(pkg_path)), "Project.toml") + isfile(project) || return nothing + toml = read(project, String) + m = match(r"(*ANYCRLF)^version\s*=\s\"(.*)\"$"m, toml) + return VersionNumber(m[1]) +end + +""" + rekey(collection, keymap) -> rekeyed_collection + +Return a new collection where values for specific keys have been moved to other keys. + +`keymap` must be a keyed collection mapping from keys of the same type as `collection` to +other keys of the same type. + +Keys present in `keymap` but absent from `collection` are ignored. +""" +rekey(d, keymap) = Dict(get(keymap, k, k) => d[k] for k in keys(d)) +function rekey(d::NamedTuple, keymap) + new_keys = map(k -> get(keymap, k, k), keys(d)) + return NamedTuple{new_keys}(values(d)) +end diff --git a/src/data.jl b/src/data.jl index ca2c92b0..ed7528ed 100644 --- a/src/data.jl +++ b/src/data.jl @@ -1,92 +1,9 @@ @forwardfun extract_dataset convert_result(::typeof(extract_dataset), result, args...) = convert(Dataset, result) -""" - convert_to_inference_data(obj; group, kwargs...) -> InferenceData - -Convert a supported object to an [`InferenceData`](@ref) object. - -If `obj` converts to a single dataset, `group` specifies which dataset in the resulting -`InferenceData` that is. - -See [`convert_to_dataset`](@ref) - -# Arguments - - - `obj` can be many objects. Basic supported types are: - - + [`InferenceData`](@ref): return unchanged - + `AbstractString`: attempt to load a NetCDF file from disk - + [`Dataset`](@ref)/`DimensionalData.AbstractDimStack`: add to `InferenceData` as the only - group - + `NamedTuple`/`AbstractDict`: create a `Dataset` as the only group - + `AbstractArray{<:Real}`: create a `Dataset` as the only group, given an arbitrary - name, if the name is not set - + `PyCall.PyObject`: forward object to Python ArviZ for conversion - -More specific types are documented separately. - -# Keywords - - - `group::Symbol = :posterior`: If `obj` converts to a single dataset, assign the resulting - dataset to this group. - - - `dims`: a collection mapping variable names to collections of objects containing dimension names. Acceptable such objects are: - - + `Symbol`: dimension name - + `Type{<:DimensionsionalData.Dimension}`: dimension type - + `DimensionsionalData.Dimension`: dimension, potentially with indices - + `Nothing`: no dimension name provided, dimension name is automatically generated - - `coords`: a collection indexable by dimension name specifying the indices of the given - dimension. If indices for a dimension in `dims` are provided, they are used even if - the dimension contains its own indices. If a dimension is missing, its indices are - automatically generated. - - `kwargs`: remaining keywords forwarded to converter functions -""" -function convert_to_inference_data end - -convert_to_inference_data(data::InferenceData; kwargs...) = data -function convert_to_inference_data(stack::DimensionalData.AbstractDimStack; kwargs...) - return convert_to_inference_data(Dataset(stack); kwargs...) -end -function convert_to_inference_data(data::Dataset; group=:posterior, kwargs...) - return convert_to_inference_data(InferenceData(; group => data); kwargs...) -end -function convert_to_inference_data(data::AbstractDict{Symbol}; kwargs...) - return convert_to_inference_data(NamedTuple(data); kwargs...) -end -function convert_to_inference_data(var_data::AbstractArray{<:Real}; kwargs...) - data = (; default_var_name(var_data) => var_data) - return convert_to_inference_data(data; kwargs...) -end function convert_to_inference_data(filename::AbstractString; kwargs...) return from_netcdf(filename) end -function convert_to_inference_data( - data::NamedTuple{<:Any,<:Tuple{Vararg{AbstractArray{<:Real}}}}; - group=:posterior, - kwargs..., -) - ds = namedtuple_to_dataset(data; kwargs...) - return convert_to_inference_data(ds; group) -end - -""" - default_var_name(data) -> Symbol - -Return the default name for the variable whose values are stored in `data`. -""" -default_var_name(data) = :x -function default_var_name(data::DimensionalData.AbstractDimArray) - name = DimensionalData.name(data) - name isa Symbol && return name - name isa AbstractString && !isempty(name) && return Symbol(name) - return default_var_name(parent(data)) -end - -function convert_to_dataset(data::InferenceData; group::Symbol=:posterior, kwargs...) - return getproperty(data, group) -end @forwardfun load_arviz_data @@ -109,46 +26,3 @@ function concat(data::InferenceData...; kwargs...) end Docs.getdoc(::typeof(concat)) = forwardgetdoc(:concat) - -""" - merge(data::InferenceData, others::InferenceData...) -> InferenceData - -Merge [`InferenceData`](@ref) objects. - -The result contains all groups in `data` and `others`. -If a group appears more than once, the one that occurs first is kept. - -See [`concat`](@ref) -""" -function Base.merge(data::InferenceData, others::InferenceData...) - return InferenceData(Base.merge(groups(data), map(groups, others)...)) -end - -function rekey(data::InferenceData, keymap) - groups_old = groups(data) - names_new = map(k -> get(keymap, k, k), propertynames(groups_old)) - groups_new = NamedTuple{names_new}(Tuple(groups_old)) - return InferenceData(groups_new) -end - -# python interop - -function PyObject(data::InferenceData) - return pycall(arviz.InferenceData, PyObject; map(PyObject, groups(data))...) -end - -function convert_to_inference_data(obj::PyObject; dims=nothing, coords=nothing, kwargs...) - if pyisinstance(obj, arviz.InferenceData) - group_names = obj.groups() - groups = ( - Symbol(name) => convert(Dataset, getindex(obj, name)) for name in group_names - ) - return InferenceData(; groups...) - else - # Python ArviZ requires dims and coords be dicts matching to vectors - pydims = dims === nothing ? dims : Dict(k -> collect(dims[k]) for k in keys(dims)) - pycoords = - dims === nothing ? dims : Dict(k -> collect(coords[k]) for k in keys(coords)) - return arviz.convert_to_inference_data(obj; dims=pydims, coords=pycoords, kwargs...) - end -end diff --git a/src/dataset.jl b/src/dataset.jl deleted file mode 100644 index 6e6d0b51..00000000 --- a/src/dataset.jl +++ /dev/null @@ -1,254 +0,0 @@ -""" - Dataset{L} <: DimensionalData.AbstractDimStack{L} - -Container of dimensional arrays sharing some dimensions. - -This type is an -[`DimensionalData.AbstractDimStack`](https://rafaqz.github.io/DimensionalData.jl/stable/api/#DimensionalData.AbstractDimStack) -that implements the same interface as `DimensionalData.DimStack` and has identical usage. - -When a `Dataset` is passed to Python, it is converted to an `xarray.Dataset` without copying -the data. That is, the Python object shares the same memory as the Julia object. However, -if an `xarray.Dataset` is passed to Julia, its data must be copied. - -# Constructors - - Dataset(data::DimensionalData.AbstractDimArray...) - Dataset(data::Tuple{Vararg{<:DimensionalData.AbstractDimArray}}) - Dataset(data::NamedTuple{Keys,Vararg{<:DimensionalData.AbstractDimArray}}) - Dataset( - data::NamedTuple, - dims::Tuple{Vararg{DimensionalData.Dimension}}; - metadata=DimensionalData.NoMetadata(), - ) - -In most cases, use [`convert_to_dataset`](@ref) to create a `Dataset` instead of directly -using a constructor. -""" -struct Dataset{L,D<:DimensionalData.AbstractDimStack{L}} <: - DimensionalData.AbstractDimStack{L} - data::D -end - -Dataset(args...; kwargs...) = Dataset(DimensionalData.DimStack(args...; kwargs...)) -Dataset(data::Dataset) = data - -Base.parent(data::Dataset) = getfield(data, :data) - -Base.propertynames(data::Dataset) = keys(data) - -Base.getproperty(data::Dataset, k::Symbol) = getindex(data, k) - -function setattribute!(data::Dataset, k::Symbol, value) - setindex!(DimensionalData.metadata(data), value, k) - return value -end -@deprecate setattribute!(data::Dataset, k::AbstractString, value) setattribute!( - data, Symbol(k), value -) false - -Base.convert(::Type{Dataset}, obj::Dataset) = obj -Base.convert(::Type{Dataset}, obj) = convert_to_dataset(obj) - -@doc doc""" - convert_to_dataset(obj; group = :posterior, kwargs...) -> Dataset - -Convert a supported object to a `Dataset`. - -In most cases, this function calls [`convert_to_inference_data`](@ref) and returns the -corresponding `group`. -""" -function convert_to_dataset end - -function convert_to_dataset(obj; group::Symbol=:posterior, kwargs...) - idata = convert_to_inference_data(obj; group, kwargs...) - dataset = getproperty(idata, group) - return dataset -end -convert_to_dataset(data::Dataset; kwargs...) = data - -""" - namedtuple_to_dataset(data; kwargs...) -> Dataset - -Convert `NamedTuple` mapping variable names to arrays to a [`Dataset`](@ref). - -# Keywords - - - `attrs`: a Symbol-indexable collection of metadata to attach to the dataset, in addition - to defaults. Values should be JSON serializable. - - - `library::Union{String,Module}`: library used for performing inference. Will be attached - to the `attrs` metadata. - - `dims`: a collection mapping variable names to collections of objects containing dimension - names. Acceptable such objects are: - - + `Symbol`: dimension name - + `Type{<:DimensionsionalData.Dimension}`: dimension type - + `DimensionsionalData.Dimension`: dimension, potentially with indices - + `Nothing`: no dimension name provided, dimension name is automatically generated - - `coords`: a collection indexable by dimension name specifying the indices of the given - dimension. If indices for a dimension in `dims` are provided, they are used even if - the dimension contains its own indices. If a dimension is missing, its indices are - automatically generated. -""" -function namedtuple_to_dataset end -function namedtuple_to_dataset( - data; attrs=(;), library=nothing, dims=(;), coords=(;), default_dims=DEFAULT_SAMPLE_DIMS -) - dim_arrays = map(keys(data)) do var_name - var_data = data[var_name] - var_dims = get(dims, var_name, ()) - return array_to_dimarray(var_data, var_name; dims=var_dims, coords, default_dims) - end - attributes = merge(default_attributes(library), attrs) - metadata = OrderedDict{Symbol,Any}(pairs(attributes)) - return Dataset(dim_arrays...; metadata) -end - -""" - array_to_dimarray(array, name; kwargs...) -> DimensionalData.AbstractDimArray - -Convert `array` to a `AbstractDimArray` with name `name`. - -If `array` is already an `AbstractDimArray`, then it is returned without modification. -See [`generate_dims`](@ref) for a description of `kwargs`. -""" -function array_to_dimarray end -function array_to_dimarray(array::DimensionalData.AbstractDimArray, name; kwargs...) - return DimensionalData.rebuild(array; name) -end -function array_to_dimarray(data, name; dims=(), coords=(;), default_dims=()) - array = if ndims(data) < 2 && has_all_sample_dims(default_dims) - reshape(data, 1, :) - else - data - end - array_dims = generate_dims(array, name; dims, coords, default_dims) - return DimensionalData.DimArray(array, array_dims; name) -end - -has_all_sample_dims(dims) = all(Dimensions.hasdim(dims, DEFAULT_SAMPLE_DIMS)) - -""" - default_attributes(library=nothing) -> NamedTuple - -Generate default attributes metadata for a dataset generated by inference library `library`. - -`library` may be a `String` or a `Module`. -""" -function default_attributes(library=nothing) - return ( - created_at=Dates.format(Dates.now(), Dates.ISODateTimeFormat), - arviz_version=string(package_version(ArviZ)), - arviz_language="julia", - library_attributes(library)..., - ) -end - -library_attributes(library) = (; inference_library=string(library)) -library_attributes(::Nothing) = (;) -function library_attributes(library::Module) - return ( - inference_library=string(library), - inference_library_version=string(package_version(library)), - ) -end - -""" - generate_dims(array, name; dims, coords, default_dims) - -Generate `DimensionsionalData.Dimension` objects for each dimension of `array`. - -`name` indicates the name of the variable represented by array. - -# Keywords - - - `dims`: A collection of objects indicating dimension names. If any dimensions are not - provided, their names are automatically generated. Acceptable types of entries are: - - + `Symbol`: dimension name - + `Type{<:DimensionsionalData.Dimension}`: dimension type - + `DimensionsionalData.Dimension`: dimension, potentially with indices - + `Nothing`: no dimension name provided, dimension name is automatically generated - - - `coords`: a collection indexable by dimension name specifying the indices of the given - dimension. If indices for a dimension in `dims` are provided, they are used even if - the dimension contains its own indices. If a dimension is missing, its indices are - automatically generated. - - `default_dims`: A collection of dims to be prepended to `dims` whose elements have the - same constraints. -""" -function generate_dims end -function generate_dims(array, name; dims=(), coords=(;), default_dims=()) - num_default_dims = length(default_dims) - length(dims) + num_default_dims > ndims(array) && @error "blah" - dims_named = ntuple(ndims(array) - length(default_dims)) do i - dim = get(dims, i, nothing) - dim === nothing && return Symbol("$(name)_dim_$(i)") - return dim - end - dims_all = (default_dims..., dims_named...) - dims_with_coords = ntuple(ndims(array)) do i - return as_dimension(dims_all[i], coords, size(array, i)) - end - return Dimensions.format(dims_with_coords, array) -end - -""" - as_dimension(dim, coords, length) -> DimensionsionalData.Dimension - -Convert `dim`, `coords`, and `length` to a `Dimension` object. - -# Arguments - - - `dim`: An object specifying the name and potentially indices of a dimension. Can be the - following types: - - + `Symbol`: dimension name. - + `Type{<:DimensionsionalData.Dimension}`: dimension type - + `DimensionsionalData.Dimension`: dimension, potentially with indices - - - `coords`: a collection indexable by dimension name specifying the indices of the given - dimension. If indices are provided, they are used even if `dim` contains its own - indices. If a dimension is missing, its indices are automatically generated. - - `length`: the length of the dimension. If `coords` and `dim` indices are not provided, - then the indices `1:n` are used. -""" -function as_dimension end -function as_dimension(dim::Dimensions.Dimension, coords, n) - name = Dimensions.name(dim) - haskey(coords, name) && return Dimensions.rebuild(dim, coords[name]) - Dimensions.val(dim) isa Colon && return Dimensions.rebuild(dim, 1:n) - return dim -end -function as_dimension(dim::Type{<:Dimensions.Dimension}, coords, n) - return as_dimension(dim(1:n), coords, n) -end -function as_dimension(dim::Symbol, coords, n) - return as_dimension(Dimensions.rebuild(Dimensions.key2dim(dim), 1:n), coords, n) -end - -# DimensionalData interop - -for f in [:data, :dims, :refdims, :metadata, :layerdims, :layermetadata] - @eval begin - DimensionalData.$(f)(ds::Dataset) = DimensionalData.$(f)(parent(ds)) - end -end - -# Warning: this is not an API function and probably should be implemented abstractly upstream -DimensionalData.show_after(io, mime, ::Dataset) = nothing - -attributes(data::DimensionalData.AbstractDimStack) = DimensionalData.metadata(data) - -Base.convert(T::Type{<:DimensionalData.DimStack}, data::Dataset) = convert(T, parent(data)) - -function DimensionalData.rebuild(data::Dataset; kwargs...) - return Dataset(DimensionalData.rebuild(parent(data); kwargs...)) -end - -# python interop - -PyObject(data::Dataset) = _to_xarray(data) - -Base.convert(::Type{Dataset}, obj::PyObject) = Dataset(_dimstack_from_xarray(obj)) diff --git a/src/utils.jl b/src/utils.jl index 92ccb256..59ce177e 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -184,22 +184,6 @@ replacemissing(x::AbstractArray{<:AbstractArray}) = map(replacemissing, x) @inline replacemissing(::Missing) = NaN @inline replacemissing(x::Number) = x -""" - flatten(x) - -If `x` is an array of arrays, flatten into a single array whose dimensions are ordered with -dimensions of the outermost container first and innermost container last. -""" -flatten(x) = x -flatten(x::AbstractArray{<:Number}) = convert(Array, x) -function flatten(x::AbstractArray{S}) where {T<:Number,N,S<:AbstractArray{T,N}} - ret = Array{T}(undef, (size(x)..., size(x[1])...)) - for k in keys(x) - setindex!(ret, x[k], k, (Colon() for _ in 1:N)...) - end - return ret -end - # Convert python types to Julia types if possible @inline frompytype(x) = x @inline frompytype(x::PyObject) = PyAny(x) @@ -207,12 +191,6 @@ frompytype(x::AbstractArray{PyObject}) = map(frompytype, x) frompytype(x::AbstractArray{Any}) = map(frompytype, x) frompytype(x::AbstractArray{<:AbstractArray}) = map(frompytype, x) -rekey(d, keymap) = Dict(get(keymap, k, k) => d[k] for k in keys(d)) -function rekey(d::NamedTuple, keymap) - new_keys = map(k -> get(keymap, k, k), keys(d)) - return NamedTuple{new_keys}(values(d)) -end - enforce_stat_eltypes(stats) = convert_to_eltypes(stats, sample_stats_eltypes) function convert_to_eltypes(data::Dict, data_eltypes) @@ -292,21 +270,3 @@ function topandas(::Val{:ELPDData}, df) colvals = Array(only(eachrow(df))) return ArviZ.arviz.stats.ELPDData(colvals, rownames) end - -function package_version(pkg::Module) - isdefined(Base, :pkgversion) && return Base.pkgversion(pkg) - project = joinpath(dirname(dirname(pathof(pkg))), "Project.toml") - toml = read(project, String) - m = match(r"(*ANYCRLF)^version\s*=\s\"(.*)\"$"m, toml) - return VersionNumber(m[1]) -end - -struct AsSlice{T<:Dimensions.Selector} <: Dimensions.Selector{T} - val::T -end - -function Dimensions.selectindices(l::Dimensions.LookupArray, sel::AsSlice; kw...) - i = Dimensions.selectindices(l, Dimensions.val(sel); kw...) - inds = i isa AbstractVector ? i : [i] - return inds -end diff --git a/src/xarray.jl b/src/xarray.jl index 2f6b874a..3e7bf546 100644 --- a/src/xarray.jl +++ b/src/xarray.jl @@ -1,3 +1,26 @@ +PyObject(data::Dataset) = _to_xarray(data) + +Base.convert(::Type{Dataset}, obj::PyObject) = Dataset(_dimstack_from_xarray(obj)) + +function PyObject(data::InferenceData) + return pycall(arviz.InferenceData, PyObject; map(PyObject, groups(data))...) +end + +function convert_to_inference_data(obj::PyObject; dims=nothing, coords=nothing, kwargs...) + if pyisinstance(obj, arviz.InferenceData) + group_names = obj.groups() + groups = ( + Symbol(name) => convert(Dataset, getindex(obj, name)) for name in group_names + ) + return InferenceData(; groups...) + else + # Python ArviZ requires dims and coords be dicts matching to vectors + pydims = dims === nothing ? dims : Dict(k -> collect(dims[k]) for k in keys(dims)) + pycoords = + dims === nothing ? dims : Dict(k -> collect(coords[k]) for k in keys(coords)) + return arviz.convert_to_inference_data(obj; dims=pydims, coords=pycoords, kwargs...) + end +end function _dimstack_from_xarray(o::PyObject) pyisinstance(o, xarray.Dataset) || diff --git a/test/InferenceObjects/convert_dataset.jl b/test/InferenceObjects/convert_dataset.jl new file mode 100644 index 00000000..37b46943 --- /dev/null +++ b/test/InferenceObjects/convert_dataset.jl @@ -0,0 +1,58 @@ +using ArviZ.InferenceObjects, DimensionalData, Test + +@testset "conversion to Dataset" begin + @testset "conversion" begin + J = 8 + K = 6 + L = 3 + nchains = 4 + ndraws = 500 + vars = (a=randn(nchains, ndraws, J), b=randn(nchains, ndraws, K, L)) + coords = (bi=2:(K + 1), draw=1:2:1_000) + dims = (b=[:bi, nothing],) + attrs = Dict(:mykey => 5) + ds = namedtuple_to_dataset(vars; library="MyLib", coords, dims, attrs) + @test convert(Dataset, ds) === ds + ds2 = convert(Dataset, [1.0, 2.0, 3.0, 4.0]) + @test ds2 isa Dataset + @test ds2 == convert_to_dataset([1.0, 2.0, 3.0, 4.0]) + @test convert(DimensionalData.DimStack, ds) === parent(ds) + end + + @testset "convert_to_dataset" begin + nchains = 4 + ndraws = 100 + nshared = 3 + xdims = (:chain, :draw, :shared) + x = DimArray(randn(nchains, ndraws, nshared), xdims) + ydims = (:chain, :draw, Dim{:ydim1}(Any["a", "b"]), Dim{:shared}) + y = DimArray(randn(nchains, ndraws, 2, nshared), ydims) + metadata = Dict(:prop1 => "val1", :prop2 => "val2") + ds = Dataset((; x, y); metadata) + + @testset "convert_to_dataset(::Dataset; kwargs...)" begin + @test convert_to_dataset(ds) isa Dataset + @test convert_to_dataset(ds) === ds + end + + @testset "convert_to_dataset(::$T; kwargs...)" for T in (Dict, NamedTuple) + data = (x=randn(4, 100), y=randn(4, 100, 2)) + if T <: Dict + data = T(pairs(data)) + end + ds2 = convert_to_dataset(data) + @test ds2 isa Dataset + @test ds2.x == data[:x] + @test DimensionalData.name(DimensionalData.dims(ds2.x)) == (:chain, :draw) + @test ds2.y == data[:y] + @test DimensionalData.name(DimensionalData.dims(ds2.y)) == + (:chain, :draw, :y_dim_1) + end + + @testset "convert_to_dataset(::InferenceData; kwargs...)" begin + idata = random_data() + @test convert_to_dataset(idata) === idata.posterior + @test convert_to_dataset(idata; group=:prior) === idata.prior + end + end +end diff --git a/test/InferenceObjects/convert_inference_data.jl b/test/InferenceObjects/convert_inference_data.jl new file mode 100644 index 00000000..f5d48dd2 --- /dev/null +++ b/test/InferenceObjects/convert_inference_data.jl @@ -0,0 +1,84 @@ +using ArviZ.InferenceObjects, DimensionalData, Test + +@testset "conversion to InferenceData" begin + @testset "default_var_name" begin + x = randn(4, 5) + @test InferenceObjects.default_var_name(x) === :x + @test InferenceObjects.default_var_name(DimensionalData.DimArray(x, (:a, :b))) === + :x + @test InferenceObjects.default_var_name( + DimensionalData.DimArray(x, (:a, :b); name=:y) + ) === :y + end + + @testset "conversion" begin + var_names = (:a, :b) + data_names = (:y,) + coords = ( + chain=1:4, draw=1:100, shared=["s1", "s2", "s3"], dima=1:4, dimb=2:6, dimy=1:5 + ) + dims = (a=(:shared, :dima), b=(:shared, :dimb), y=(:shared, :dimy)) + metadata = (inference_library="PPL",) + posterior = random_dataset(var_names, dims, coords, metadata) + prior = random_dataset(var_names, dims, coords, metadata) + observed_data = random_dataset(data_names, dims, coords, metadata) + group_data = (; prior, observed_data, posterior) + idata = InferenceData(group_data) + @test convert(InferenceData, idata) === idata + @test convert(NamedTuple, idata) === parent(idata) + @test NamedTuple(idata) === parent(idata) + a = idata.posterior.a + @test convert(InferenceData, a) isa InferenceData + @test convert(InferenceData, a).posterior.a == a + end + + @testset "convert_to_inference_data" begin + @testset "convert_to_inference_data(::AbstractDimStack)" begin + ds = namedtuple_to_dataset((x=randn(4, 10), y=randn(4, 10, 5))) + idata1 = convert_to_inference_data(ds; group=:prior) + @test InferenceObjects.groupnames(idata1) == (:prior,) + idata2 = InferenceData(; prior=ds) + @test idata2 == idata1 + idata3 = convert_to_inference_data(parent(ds); group=:prior) + @test idata3 == idata1 + end + + @testset "convert_to_inference_data(::$T)" for T in (NamedTuple, Dict) + data = (A=randn(2, 10, 2), B=randn(2, 10, 5, 2)) + if T <: Dict + data = Dict(pairs(data)) + end + idata = convert_to_inference_data(data) + check_idata_schema(idata) + @test InferenceObjects.groupnames(idata) == (:posterior,) + posterior = idata.posterior + @test posterior.A == data[:A] + @test posterior.B == data[:B] + idata2 = convert_to_inference_data(data; group=:prior) + check_idata_schema(idata2) + @test InferenceObjects.groupnames(idata2) == (:prior,) + @test idata2.prior == idata.posterior + end + + @testset "convert_to_inference_data(::$T)" for T in + (Array, DimensionalData.DimArray) + data = randn(2, 10, 2) + if T <: DimensionalData.DimArray + data = DimensionalData.DimArray(data, (:a, :b, :c); name=:y) + end + idata = convert_to_inference_data(data) + check_idata_schema(idata) + @test InferenceObjects.groupnames(idata) == (:posterior,) + posterior = idata.posterior + if T <: DimensionalData.DimArray + @test posterior.y == data + else + @test posterior.x == data + end + idata2 = convert_to_inference_data(data; group=:prior) + check_idata_schema(idata2) + @test InferenceObjects.groupnames(idata2) == (:prior,) + @test idata2.prior == idata.posterior + end + end +end diff --git a/test/test_dataset.jl b/test/InferenceObjects/dataset.jl similarity index 50% rename from test/test_dataset.jl rename to test/InferenceObjects/dataset.jl index 87b09a21..f4ef9e31 100644 --- a/test/test_dataset.jl +++ b/test/InferenceObjects/dataset.jl @@ -1,7 +1,7 @@ -using ArviZ, DimensionalData, OrderedCollections, PyCall, Test +using ArviZ.InferenceObjects, DimensionalData, OrderedCollections, Test @testset "dataset" begin - @testset "ArviZ.Dataset" begin + @testset "Dataset" begin @testset "Constructors" begin nchains = 4 ndraws = 100 @@ -14,8 +14,8 @@ using ArviZ, DimensionalData, OrderedCollections, PyCall, Test @testset "from NamedTuple" begin data = (; x, y) - ds = ArviZ.Dataset(data; metadata) - @test ds isa ArviZ.Dataset + ds = Dataset(data; metadata) + @test ds isa Dataset @test DimensionalData.data(ds) == data for dim in xdims @test DimensionalData.hasdim(ds, dim) @@ -35,8 +35,8 @@ using ArviZ, DimensionalData, OrderedCollections, PyCall, Test data = ( DimensionalData.rebuild(x; name=:x), DimensionalData.rebuild(y; name=:y) ) - ds = ArviZ.Dataset(data...; metadata) - @test ds isa ArviZ.Dataset + ds = Dataset(data...; metadata) + @test ds isa Dataset @test values(DimensionalData.data(ds)) == data for dim in xdims @test DimensionalData.hasdim(ds, dim) @@ -53,8 +53,8 @@ using ArviZ, DimensionalData, OrderedCollections, PyCall, Test end @testset "idempotent" begin - ds = ArviZ.Dataset((; x, y); metadata) - @test ArviZ.Dataset(ds) === ds + ds = Dataset((; x, y); metadata) + @test Dataset(ds) === ds end @testset "errors with mismatched dimensions" begin @@ -62,7 +62,7 @@ using ArviZ, DimensionalData, OrderedCollections, PyCall, Test x=DimArray(randn(3, 100, 3), (:chains, :draws, :shared)), y=DimArray(randn(4, 100, 2, 3), (:chains, :draws, :ydim1, :shared)), ) - @test_throws Exception ArviZ.Dataset(data_bad) + @test_throws Exception Dataset(data_bad) end end @@ -74,7 +74,7 @@ using ArviZ, DimensionalData, OrderedCollections, PyCall, Test ydims = (:chain, :draw, :ydim1, :shared) y = DimArray(randn(nchains, ndraws, 2, nshared), ydims) metadata = Dict(:prop1 => "val1", :prop2 => "val2") - ds = ArviZ.Dataset((; x, y); metadata) + ds = Dataset((; x, y); metadata) @testset "parent" begin @test parent(ds) isa DimStack @@ -102,71 +102,15 @@ using ArviZ, DimensionalData, OrderedCollections, PyCall, Test end @testset "attributes" begin - @test ArviZ.attributes(ds) == metadata + @test InferenceObjects.attributes(ds) == metadata dscopy = deepcopy(ds) - ArviZ.setattribute!(dscopy, :prop3, "val3") - @test ArviZ.attributes(dscopy)[:prop3] == "val3" - @test_deprecated ArviZ.setattribute!(dscopy, "prop3", "val4") - @test ArviZ.attributes(dscopy)[:prop3] == "val4" - end - - @testset "conversion" begin - @test convert(ArviZ.Dataset, ds) === ds - ds2 = convert(ArviZ.Dataset, [1.0, 2.0, 3.0, 4.0]) - @test ds2 isa ArviZ.Dataset - @test ds2 == ArviZ.convert_to_dataset([1.0, 2.0, 3.0, 4.0]) - @test convert(DimensionalData.DimStack, ds) === parent(ds) + InferenceObjects.setattribute!(dscopy, :prop3, "val3") + @test InferenceObjects.attributes(dscopy)[:prop3] == "val3" + @test_deprecated InferenceObjects.setattribute!(dscopy, "prop3", "val4") + @test InferenceObjects.attributes(dscopy)[:prop3] == "val4" end end - @testset "Dataset <-> xarray" begin - nchains = 4 - ndraws = 100 - nshared = 3 - xdims = (:chain, :draw, :shared) - x = DimArray(randn(nchains, ndraws, nshared), xdims) - ydims = (:chain, :draw, Dim{:ydim1}(Any["a", "b"]), :shared) - y = DimArray(randn(nchains, ndraws, 2, nshared), ydims) - metadata = Dict(:prop1 => "val1", :prop2 => "val2") - ds = ArviZ.Dataset((; x, y); metadata) - o = PyObject(ds) - @test o isa PyObject - @test pyisinstance(o, ArviZ.xarray.Dataset) - - @test issetequal(Symbol.(o.coords.keys()), (:chain, :draw, :shared, :ydim1)) - for (dim, coord) in o.coords.items() - @test collect(coord.values) == DimensionalData.index(ds, Symbol(dim)) - end - - variables = Dict(collect(o.data_vars.variables.items())) - @test "x" ∈ keys(variables) - @test x == variables["x"].values - @test variables["x"].dims == String.(xdims) - - @test "y" ∈ keys(variables) - @test y == variables["y"].values - @test variables["y"].dims == ("chain", "draw", "ydim1", "shared") - - # check that the Python object accesses the underlying Julia array - x[1] = 1 - @test x == variables["x"].values - - ds2 = convert(ArviZ.Dataset, o) - @test ds2 isa ArviZ.Dataset - @test ds2.x ≈ ds.x - @test ds2.y ≈ ds.y - dims1 = sort(collect(DimensionalData.dims(ds)); by=DimensionalData.name) - dims2 = sort(collect(DimensionalData.dims(ds2)); by=DimensionalData.name) - for (dim1, dim2) in zip(dims1, dims2) - @test DimensionalData.name(dim1) === DimensionalData.name(dim2) - @test DimensionalData.index(dim1) == DimensionalData.index(dim2) - if DimensionalData.index(dim1) isa AbstractRange - @test DimensionalData.index(dim2) isa AbstractRange - end - end - @test DimensionalData.metadata(ds2) == DimensionalData.metadata(ds) - end - @testset "namedtuple_to_dataset" begin J = 8 K = 6 @@ -190,11 +134,11 @@ using ArviZ, DimensionalData, OrderedCollections, PyCall, Test ), ) attrs = Dict(:mykey => 5) - @test_broken @inferred ArviZ.namedtuple_to_dataset( + @test_broken @inferred namedtuple_to_dataset( vars; library="MyLib", coords, dims, attrs ) - ds = ArviZ.namedtuple_to_dataset(vars; library="MyLib", coords, dims, attrs) - @test ds isa ArviZ.Dataset + ds = namedtuple_to_dataset(vars; library="MyLib", coords, dims, attrs) + @test ds isa Dataset for (var_name, var_data) in pairs(DimensionalData.layers(ds)) @test var_data isa DimensionalData.DimArray @test var_name === DimensionalData.name(var_data) @@ -205,41 +149,8 @@ using ArviZ, DimensionalData, OrderedCollections, PyCall, Test metadata = DimensionalData.metadata(ds) @test metadata isa OrderedDict @test haskey(metadata, :created_at) - @test haskey(metadata, :arviz_version) - @test metadata[:arviz_language] == "julia" @test metadata[:inference_library] == "MyLib" @test !haskey(metadata, :inference_library_version) @test metadata[:mykey] == 5 end - - @testset "ArviZ.convert_to_dataset" begin - nchains = 4 - ndraws = 100 - nshared = 3 - xdims = (:chain, :draw, :shared) - x = DimArray(randn(nchains, ndraws, nshared), xdims) - ydims = (:chain, :draw, Dim{:ydim1}(Any["a", "b"]), Dim{:shared}) - y = DimArray(randn(nchains, ndraws, 2, nshared), ydims) - metadata = Dict(:prop1 => "val1", :prop2 => "val2") - ds = ArviZ.Dataset((; x, y); metadata) - - @testset "ArviZ.convert_to_dataset(::ArviZ.Dataset; kwargs...)" begin - @test ArviZ.convert_to_dataset(ds) isa ArviZ.Dataset - @test ArviZ.convert_to_dataset(ds) === ds - end - - @testset "ArviZ.convert_to_dataset(::$T; kwargs...)" for T in (Dict, NamedTuple) - data = (x=randn(4, 100), y=randn(4, 100, 2)) - if T <: Dict - data = T(pairs(data)) - end - ds2 = ArviZ.convert_to_dataset(data) - @test ds2 isa ArviZ.Dataset - @test ds2.x == data[:x] - @test DimensionalData.name(DimensionalData.dims(ds2.x)) == (:chain, :draw) - @test ds2.y == data[:y] - @test DimensionalData.name(DimensionalData.dims(ds2.y)) == - (:chain, :draw, :y_dim_1) - end - end end diff --git a/test/InferenceObjects/dimensions.jl b/test/InferenceObjects/dimensions.jl new file mode 100644 index 00000000..7b3be715 --- /dev/null +++ b/test/InferenceObjects/dimensions.jl @@ -0,0 +1,115 @@ +using ArviZ.InferenceObjects, DimensionalData, OffsetArrays, Test + +@testset "dimension-related functions" begin + @testset "has_all_sample_dims" begin + @test !InferenceObjects.has_all_sample_dims((:chain,)) + @test !InferenceObjects.has_all_sample_dims((:draw,)) + @test InferenceObjects.has_all_sample_dims((:chain, :draw)) + @test InferenceObjects.has_all_sample_dims((:draw, :chain)) + @test InferenceObjects.has_all_sample_dims((:draw, :chain, :x)) + + @test !InferenceObjects.has_all_sample_dims((Dim{:chain},)) + @test !InferenceObjects.has_all_sample_dims((Dim{:draw},)) + @test InferenceObjects.has_all_sample_dims((Dim{:chain}, Dim{:draw})) + @test InferenceObjects.has_all_sample_dims((Dim{:draw}, Dim{:chain})) + @test InferenceObjects.has_all_sample_dims((Dim{:draw}, Dim{:chain}, Dim{:x})) + + @test !InferenceObjects.has_all_sample_dims((Dim{:chain}(1:4),)) + @test !InferenceObjects.has_all_sample_dims((Dim{:draw}(1:10),)) + @test InferenceObjects.has_all_sample_dims((Dim{:chain}(1:4), Dim{:draw}(1:10))) + @test InferenceObjects.has_all_sample_dims((Dim{:draw}(1:10), Dim{:chain}(1:4))) + @test InferenceObjects.has_all_sample_dims(( + Dim{:draw}(1:10), Dim{:chain}(1:4), Dim{:x}(1:2) + )) + end + + @testset "as_dimension" begin + coords = (;) + @testset for dim in (:foo, Dim{:foo}, Dim{:foo,Colon}) + @test InferenceObjects.as_dimension(dim, coords, 2:10) === Dim{:foo}(2:10) + dim === :foo || @inferred InferenceObjects.as_dimension(dim, coords, 2:10) + end + @test InferenceObjects.as_dimension(Dim{:foo}(1:5), coords, 2:10) === Dim{:foo}(1:5) + coords = (; foo=3:8) + @testset for dim in (:foo, Dim{:foo}, Dim{:foo,Colon}, Dim{:foo}(1:5)) + @test InferenceObjects.as_dimension(dim, coords, 2:10) === Dim{:foo}(3:8) + dim === :foo || @inferred InferenceObjects.as_dimension(dim, coords, 2:10) + end + end + + @testset "generate_dims" begin + x = OffsetArray(randn(4, 10, 2, 3), 0:3, 11:20, -1:0, 2:4) + gdims = @inferred NTuple{4,Dimensions.Dimension} InferenceObjects.generate_dims( + x, :x + ) + @test gdims isa NTuple{4,Dim} + @test Dimensions.name(gdims) === (:x_dim_1, :x_dim_2, :x_dim_3, :x_dim_4) + @test Dimensions.index(gdims) == (0:3, 11:20, -1:0, 2:4) + + gdims = @inferred NTuple{4,Dimensions.Dimension} InferenceObjects.generate_dims( + x, :y; dims=(:a, :b) + ) + @test gdims isa NTuple{4,Dim} + @test Dimensions.name(gdims) === (:a, :b, :y_dim_3, :y_dim_4) + @test Dimensions.index(gdims) == (0:3, 11:20, -1:0, 2:4) + + gdims = @inferred NTuple{4,Dimensions.Dimension} InferenceObjects.generate_dims( + x, :z; dims=(:c, :d), default_dims=(:chain, :draw) + ) + @test gdims isa NTuple{4,Dim} + @test Dimensions.name(gdims) === (:chain, :draw, :c, :d) + @test Dimensions.index(gdims) == (0:3, 11:20, -1:0, 2:4) + end + + @testset "array_to_dim_array" begin + x = OffsetArray(randn(4, 10, 2, 3), 0:3, 11:20, -1:0, 2:4) + da = @inferred DimArray InferenceObjects.array_to_dimarray(x, :x) + @test da == x + @test DimensionalData.name(da) === :x + gdims = Dimensions.dims(da) + @test gdims isa NTuple{4,Dim} + @test Dimensions.name(gdims) === (:x_dim_1, :x_dim_2, :x_dim_3, :x_dim_4) + @test Dimensions.index(gdims) == (0:3, 11:20, -1:0, 2:4) + + da = @inferred DimArray InferenceObjects.array_to_dimarray(x, :y; dims=(:a, :b)) + @test da == x + @test DimensionalData.name(da) === :y + gdims = Dimensions.dims(da) + @test gdims isa NTuple{4,Dim} + @test Dimensions.name(gdims) === (:a, :b, :y_dim_3, :y_dim_4) + @test Dimensions.index(gdims) == (0:3, 11:20, -1:0, 2:4) + + da = @inferred DimArray InferenceObjects.array_to_dimarray( + x, :z; dims=(:c, :d), default_dims=(:chain, :draw) + ) + @test da == x + @test DimensionalData.name(da) === :z + gdims = Dimensions.dims(da) + @test gdims isa NTuple{4,Dim} + @test Dimensions.name(gdims) === (:chain, :draw, :c, :d) + @test Dimensions.index(gdims) == (0:3, 11:20, -1:0, 2:4) + end + + @testset "AsSlice" begin + da = DimArray(randn(2), Dim{:a}(["foo", "bar"])) + @test da[a=At("foo")] == da[1] + da_sel = @inferred da[a=InferenceObjects.AsSlice(At("foo"))] + @test da_sel isa DimArray + @test Dimensions.dims(da_sel) == (Dim{:a}(["foo"]),) + @test da_sel == da[a=At(["foo"])] + + da_sel = @inferred da[a=At(["foo", "bar"])] + @test da_sel isa DimArray + @test Dimensions.dims(da_sel) == Dimensions.dims(da) + @test da_sel == da + end + + @testset "index_to_indices" begin + @test InferenceObjects.index_to_indices(1) == [1] + @test InferenceObjects.index_to_indices(2) == [2] + @test InferenceObjects.index_to_indices([2]) == [2] + @test InferenceObjects.index_to_indices(1:10) === 1:10 + @test InferenceObjects.index_to_indices(At(1)) === InferenceObjects.AsSlice(At(1)) + @test InferenceObjects.index_to_indices(At(1)) === InferenceObjects.AsSlice(At(1)) + end +end diff --git a/test/test_namedtuple.jl b/test/InferenceObjects/from_namedtuple.jl similarity index 100% rename from test/test_namedtuple.jl rename to test/InferenceObjects/from_namedtuple.jl diff --git a/test/InferenceObjects/inference_data.jl b/test/InferenceObjects/inference_data.jl new file mode 100644 index 00000000..a8e3ab16 --- /dev/null +++ b/test/InferenceObjects/inference_data.jl @@ -0,0 +1,127 @@ +using ArviZ.InferenceObjects, DimensionalData, Test + +@testset "InferenceData" begin + var_names = (:a, :b) + data_names = (:y,) + coords = ( + chain=1:4, draw=1:100, shared=["s1", "s2", "s3"], dima=1:4, dimb=2:6, dimy=1:5 + ) + dims = (a=(:shared, :dima), b=(:shared, :dimb), y=(:shared, :dimy)) + metadata = (inference_library="PPL",) + posterior = random_dataset(var_names, dims, coords, metadata) + prior = random_dataset(var_names, dims, coords, metadata) + observed_data = random_dataset(data_names, dims, coords, metadata) + group_data = (; prior, observed_data, posterior) + group_data_ordered = (; posterior, prior, observed_data) + + @testset "constructors" begin + idata = @inferred(InferenceData(group_data)) + @test idata isa InferenceData + @test getfield(idata, :groups) === group_data_ordered + + @test InferenceData(; group_data...) == idata + @test InferenceData(idata) === idata + end + + idata = InferenceData(group_data) + + @testset "properties" begin + @test propertynames(idata) === propertynames(group_data_ordered) + @test getproperty(idata, :posterior) === posterior + @test getproperty(idata, :prior) === prior + @test hasproperty(idata, :posterior) + @test hasproperty(idata, :prior) + @test !hasproperty(idata, :prior_predictive) + end + + @testset "iteration" begin + @test keys(idata) === keys(group_data_ordered) + @test haskey(idata, :posterior) + @test haskey(idata, :prior) + @test !haskey(idata, :log_likelihood) + @test values(idata) === values(group_data_ordered) + @test pairs(idata) isa Base.Iterators.Pairs + @test pairs(idata) === pairs(group_data_ordered) + @test length(idata) == length(group_data_ordered) + @test iterate(idata) === iterate(group_data_ordered) + for i in 1:(length(idata) + 1) + @test iterate(idata, i) === iterate(group_data_ordered, i) + end + @test eltype(idata) <: Dataset + @test collect(idata) isa Vector{<:Dataset} + end + + @testset "indexing" begin + @test idata[:posterior] === posterior + @test idata[:prior] === prior + @test idata[1] === posterior + @test idata[2] === prior + + idata_sel = idata[dima=At(2:3), dimb=At(6)] + @test idata_sel isa InferenceData + @test InferenceObjects.groupnames(idata_sel) === InferenceObjects.groupnames(idata) + @test Dimensions.index(idata_sel.posterior, :dima) == 2:3 + @test Dimensions.index(idata_sel.prior, :dima) == 2:3 + @test Dimensions.index(idata_sel.posterior, :dimb) == [6] + @test Dimensions.index(idata_sel.prior, :dimb) == [6] + + if VERSION ≥ v"1.7" + idata_sel = idata[(:posterior, :observed_data), dimy=1, dimb=1, shared=At("s1")] + @test idata_sel isa InferenceData + @test InferenceObjects.groupnames(idata_sel) === (:posterior, :observed_data) + @test Dimensions.index(idata_sel.posterior, :dima) == coords.dima + @test Dimensions.index(idata_sel.posterior, :dimb) == coords.dimb[[1]] + @test Dimensions.index(idata_sel.posterior, :shared) == ["s1"] + @test Dimensions.index(idata_sel.observed_data, :dimy) == coords.dimy[[1]] + @test Dimensions.index(idata_sel.observed_data, :shared) == ["s1"] + end + + ds_sel = idata[:posterior, chain=1] + @test ds_sel isa Dataset + @test !hasdim(ds_sel, :chain) + + idata2 = Base.setindex(idata, posterior, :warmup_posterior) + @test keys(idata2) === (keys(idata)..., :warmup_posterior) + @test idata2[:warmup_posterior] === posterior + end + + @testset "isempty" begin + @test !isempty(idata) + @test isempty(InferenceData()) + end + + @testset "groups" begin + @test InferenceObjects.groups(idata) === group_data_ordered + @test InferenceObjects.groups(InferenceData(; prior)) === (; prior) + end + + @testset "hasgroup" begin + @test InferenceObjects.hasgroup(idata, :posterior) + @test InferenceObjects.hasgroup(idata, :prior) + @test !InferenceObjects.hasgroup(idata, :prior_predictive) + end + + @testset "groupnames" begin + @test InferenceObjects.groupnames(idata) === propertynames(group_data_ordered) + @test InferenceObjects.groupnames(InferenceData(; posterior)) === (:posterior,) + end + + @testset "show" begin + @testset "plain" begin + text = sprint(show, MIME("text/plain"), idata) + @test text == """ + InferenceData with groups: + > posterior + > prior + > observed_data""" + end + + @testset "html" begin + # TODO: improve test + text = sprint(show, MIME("text/html"), idata) + @test text isa String + @test occursin("InferenceData", text) + @test occursin("Dataset", text) + end + end +end diff --git a/test/InferenceObjects/runtests.jl b/test/InferenceObjects/runtests.jl new file mode 100644 index 00000000..20970b1a --- /dev/null +++ b/test/InferenceObjects/runtests.jl @@ -0,0 +1,12 @@ +using ArviZ.InferenceObjects, Test + +@testset "InferenceObjects" begin + include("test_helpers.jl") + include("utils.jl") + include("dimensions.jl") + include("dataset.jl") + include("inference_data.jl") + include("convert_dataset.jl") + include("convert_inference_data.jl") + include("from_namedtuple.jl") +end diff --git a/test/InferenceObjects/test_helpers.jl b/test/InferenceObjects/test_helpers.jl new file mode 100644 index 00000000..68ad03e4 --- /dev/null +++ b/test/InferenceObjects/test_helpers.jl @@ -0,0 +1,123 @@ +using ArviZ.InferenceObjects, DimensionalData + +function random_dim_array(var_name, dims, coords, default_dims=()) + _dims = (default_dims..., dims...) + _coords = NamedTuple{_dims}(getproperty.(Ref(coords), _dims)) + size = map(length, values(_coords)) + data = randn(size) + return DimArray(data, _coords; name=var_name) +end + +function random_dim_stack(var_names, dims, coords, metadata, default_dims=(:chain, :draw)) + dim_arrays = map(var_names) do k + return random_dim_array(k, getproperty(dims, k), coords, default_dims) + end + return DimStack(dim_arrays...; metadata) +end + +random_dataset(args...) = Dataset(random_dim_stack(args...)) + +function random_data() + var_names = (:a, :b) + data_names = (:y,) + coords = ( + chain=1:4, draw=1:100, shared=["s1", "s2", "s3"], dima=1:4, dimb=2:6, dimy=1:5 + ) + dims = (a=(:shared, :dima), b=(:shared, :dimb), y=(:shared, :dimy)) + metadata = (inference_library="PPL",) + posterior = random_dataset(var_names, dims, coords, metadata) + posterior_predictive = random_dataset(data_names, dims, coords, metadata) + prior = random_dataset(var_names, dims, coords, metadata) + prior_predictive = random_dataset(data_names, dims, coords, metadata) + observed_data = random_dataset(data_names, dims, coords, metadata, ()) + return InferenceData(; + posterior, posterior_predictive, prior, prior_predictive, observed_data + ) +end + +function check_idata_schema(idata) + @testset "check InferenceData schema" begin + @test idata isa InferenceData + @testset "$name" for (name, group) in pairs(idata) + @test name ∈ InferenceObjects.SCHEMA_GROUPS + @test group isa Dataset + for (var_name, var_data) in pairs(group) + @test var_data isa DimensionalData.AbstractDimArray + if InferenceObjects.has_all_sample_dims(var_data) + @test Dimensions.name(Dimensions.dims(var_data)[1]) === :chain + @test Dimensions.name(Dimensions.dims(var_data)[2]) === :draw + end + end + @testset "attributes" begin + attrs = InferenceObjects.attributes(group) + @test attrs isa AbstractDict{Symbol,Any} + @test :created_at in keys(attrs) + end + end + end +end + +function test_idata_approx_equal( + idata1::InferenceData, idata2::InferenceData; check_metadata=true +) + @test InferenceObjects.groupnames(idata1) === InferenceObjects.groupnames(idata2) + for (ds1, ds2) in zip(idata1, idata2) + @test issetequal(keys(ds1), keys(ds2)) + for var_name in keys(ds1) + da1 = ds1[var_name] + da2 = ds2[var_name] + @test da1 ≈ da2 + dims1 = DimensionalData.dims(da1) + dims2 = DimensionalData.dims(da2) + @test DimensionalData.name(dims1) == DimensionalData.name(dims2) + @test DimensionalData.index(dims1) == DimensionalData.index(dims2) + end + if check_metadata + metadata1 = DimensionalData.metadata(ds1) + metadata2 = DimensionalData.metadata(ds2) + @test issetequal(keys(metadata1), keys(metadata2)) + for k in keys(metadata1) + Symbol(k) === :created_at && continue + @test metadata1[k] == metadata2[k] + end + end + end +end + +function test_idata_group_correct( + idata, + group_name, + var_names; + library=nothing, + dims=(;), + coords=(;), + default_dims=(:chain, :draw), +) + @test idata isa InferenceData + @test InferenceObjects.hasgroup(idata, group_name) + ds = getproperty(idata, group_name) + @test ds isa Dataset + @test issetequal(keys(ds), var_names) + for name in var_names + da = ds[name] + @test DimensionalData.name(da) === name + _dims = DimensionalData.dims(da) + _dim_names_exp = (default_dims..., get(dims, name, ())...) + _dim_names = DimensionalData.name(_dims) + @test issubset(_dim_names_exp, _dim_names) + for dim in _dims + dim_name = DimensionalData.name(dim) + if dim_name ∈ keys(coords) + @test coords[dim_name] == DimensionalData.index(dim) + end + end + end + metadata = DimensionalData.metadata(ds) + if library !== nothing + @test metadata[:inference_library] == library + end + for k in [:created_at] + @test k in keys(metadata) + end + return nothing +end diff --git a/test/InferenceObjects/utils.jl b/test/InferenceObjects/utils.jl new file mode 100644 index 00000000..c3f7e59e --- /dev/null +++ b/test/InferenceObjects/utils.jl @@ -0,0 +1,44 @@ +using ArviZ, Test +using ArviZ.InferenceObjects + +module TestSubModule end + +@testset "utils" begin + @testset "flatten" begin + @test InferenceObjects.flatten([1, 2]) == [1, 2] + @test InferenceObjects.flatten([[1, 2]]) == [1 2] + @test InferenceObjects.flatten([[1, 3], [2, 4]]) == reshape(1:4, 2, 2) + @test InferenceObjects.flatten(1) === 1 + @test InferenceObjects.flatten(1:5) isa Array + end + + @testset "namedtuple_of_arrays" begin + @test InferenceObjects.namedtuple_of_arrays((x=3, y=4)) === (x=3, y=4) + @test InferenceObjects.namedtuple_of_arrays([(x=3, y=4), (x=5, y=6)]) == + (x=[3, 5], y=[4, 6]) + @test InferenceObjects.namedtuple_of_arrays([ + [(x=3, y=4), (x=5, y=6)], [(x=7, y=8), (x=9, y=10)] + ]) == (x=[3 5; 7 9], y=[4 6; 8 10]) + end + + @testset "package_version" begin + @test InferenceObjects.package_version(ArviZ) isa VersionNumber + @test InferenceObjects.package_version(TestSubModule) === nothing + end + + @testset "rekey" begin + orig = (x=3, y=4, z=5) + keymap = (x=:y, y=:a) + @testset "NamedTuple" begin + new = @inferred NamedTuple InferenceObjects.rekey(orig, keymap) + @test new isa NamedTuple + @test new == (y=3, a=4, z=5) + end + @testset "Dict" begin + orig_dict = Dict(pairs(orig)) + new = @inferred InferenceObjects.rekey(orig_dict, keymap) + @test new isa typeof(orig_dict) + @test new == Dict(:y => 3, :a => 4, :z => 5) + end + end +end diff --git a/test/Project.toml b/test/Project.toml index e9bc6620..cebd2bb2 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -3,6 +3,7 @@ CmdStan = "593b3428-ca2f-500c-ae53-031589ec8ddd" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" +OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee" @@ -17,6 +18,7 @@ CmdStan = "5.2.3, 6.0" DataFrames = "0.20, 0.21, 0.22, 1.0" DimensionalData = "0.20" MCMCChains = "0.3.15, 0.4, 1.0, 2.0, 3.0, 4.0, 5.0" +OffsetArrays = "1" OrderedCollections = "1" PyCall = "1.91.2" PyPlot = "2.8.2" diff --git a/test/helpers.jl b/test/helpers.jl index c74ced77..a437799c 100644 --- a/test/helpers.jl +++ b/test/helpers.jl @@ -115,7 +115,7 @@ function check_idata_schema(idata) @test group isa ArviZ.Dataset for (var_name, var_data) in pairs(group) @test var_data isa DimensionalData.AbstractDimArray - if ArviZ.has_all_sample_dims(var_data) + if ArviZ.InferenceObjects.has_all_sample_dims(var_data) @test Dimensions.name(Dimensions.dims(var_data)[1]) === :chain @test Dimensions.name(Dimensions.dims(var_data)[2]) === :draw end @@ -124,10 +124,8 @@ function check_idata_schema(idata) attrs = ArviZ.attributes(group) @test attrs isa AbstractDict{Symbol,Any} @test :created_at in keys(attrs) - @test :arviz_version in keys(attrs) @test_skip :inference_library in keys(attrs) @test_skip :inference_library_version in keys(attrs) - @test_skip :arviz_language in keys(attrs) end end end @@ -192,7 +190,7 @@ function test_idata_group_correct( if library !== nothing @test metadata[:inference_library] == library end - for k in [:created_at, :arviz_version] + for k in [:created_at] @test k in keys(metadata) end return nothing diff --git a/test/runtests.jl b/test/runtests.jl index e016fee0..9e624a93 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,17 +1,17 @@ using ArviZ using Test +include("InferenceObjects/runtests.jl") + @testset "ArviZ" begin include("helpers.jl") include("test_rcparams.jl") include("test_utils.jl") include("test_backend.jl") - include("test_dataset.jl") include("test_data.jl") include("test_diagnostics.jl") include("test_stats.jl") include("test_plots.jl") - include("test_namedtuple.jl") include("test_samplechains.jl") include("test_mcmcchains.jl") end diff --git a/test/test_data.jl b/test/test_data.jl index bea12c48..8bfca38c 100644 --- a/test/test_data.jl +++ b/test/test_data.jl @@ -1,157 +1,5 @@ using ArviZ, DimensionalData, Test -@testset "InferenceData" begin - var_names = (:a, :b) - data_names = (:y,) - coords = ( - chain=1:4, draw=1:100, shared=["s1", "s2", "s3"], dima=1:4, dimb=2:6, dimy=1:5 - ) - dims = (a=(:shared, :dima), b=(:shared, :dimb), y=(:shared, :dimy)) - metadata = (inference_library="PPL",) - posterior = random_dataset(var_names, dims, coords, metadata) - prior = random_dataset(var_names, dims, coords, metadata) - observed_data = random_dataset(data_names, dims, coords, metadata) - group_data = (; prior, observed_data, posterior) - group_data_ordered = (; posterior, prior, observed_data) - - @testset "constructors" begin - idata = @inferred(InferenceData(group_data)) - @test idata isa InferenceData - @test getfield(idata, :groups) === group_data_ordered - - @test InferenceData(; group_data...) == idata - @test InferenceData(idata) === idata - end - - idata = InferenceData(group_data) - - @testset "properties" begin - @test propertynames(idata) === propertynames(group_data_ordered) - @test getproperty(idata, :posterior) === posterior - @test getproperty(idata, :prior) === prior - @test hasproperty(idata, :posterior) - @test hasproperty(idata, :prior) - @test !hasproperty(idata, :prior_predictive) - end - - @testset "iteration" begin - @test keys(idata) === keys(group_data_ordered) - @test haskey(idata, :posterior) - @test haskey(idata, :prior) - @test !haskey(idata, :log_likelihood) - @test values(idata) === values(group_data_ordered) - @test pairs(idata) isa Base.Iterators.Pairs - @test pairs(idata) === pairs(group_data_ordered) - @test length(idata) == length(group_data_ordered) - @test iterate(idata) === iterate(group_data_ordered) - for i in 1:(length(idata) + 1) - @test iterate(idata, i) === iterate(group_data_ordered, i) - end - @test eltype(idata) <: ArviZ.Dataset - @test collect(idata) isa Vector{<:ArviZ.Dataset} - end - - @testset "indexing" begin - @test idata[:posterior] === posterior - @test idata[:prior] === prior - @test idata[1] === posterior - @test idata[2] === prior - - idata_sel = idata[dima=At(2:3), dimb=At(6)] - @test idata_sel isa InferenceData - @test ArviZ.groupnames(idata_sel) === ArviZ.groupnames(idata) - @test Dimensions.index(idata_sel.posterior, :dima) == 2:3 - @test Dimensions.index(idata_sel.prior, :dima) == 2:3 - @test Dimensions.index(idata_sel.posterior, :dimb) == [6] - @test Dimensions.index(idata_sel.prior, :dimb) == [6] - - if VERSION ≥ v"1.7" - idata_sel = idata[(:posterior, :observed_data), dimy=1, dimb=1, shared=At("s1")] - @test idata_sel isa InferenceData - @test ArviZ.groupnames(idata_sel) === (:posterior, :observed_data) - @test Dimensions.index(idata_sel.posterior, :dima) == coords.dima - @test Dimensions.index(idata_sel.posterior, :dimb) == coords.dimb[[1]] - @test Dimensions.index(idata_sel.posterior, :shared) == ["s1"] - @test Dimensions.index(idata_sel.observed_data, :dimy) == coords.dimy[[1]] - @test Dimensions.index(idata_sel.observed_data, :shared) == ["s1"] - end - - ds_sel = idata[:posterior, chain=1] - @test ds_sel isa ArviZ.Dataset - @test !hasdim(ds_sel, :chain) - - idata2 = Base.setindex(idata, posterior, :warmup_posterior) - @test keys(idata2) === (keys(idata)..., :warmup_posterior) - @test idata2[:warmup_posterior] === posterior - end - - @testset "isempty" begin - @test !isempty(idata) - @test isempty(InferenceData()) - end - - @testset "groups" begin - @test ArviZ.groups(idata) === group_data_ordered - @test ArviZ.groups(InferenceData(; prior)) === (; prior) - end - - @testset "hasgroup" begin - @test ArviZ.hasgroup(idata, :posterior) - @test ArviZ.hasgroup(idata, :prior) - @test !ArviZ.hasgroup(idata, :prior_predictive) - end - - @testset "groupnames" begin - @test ArviZ.groupnames(idata) === propertynames(group_data_ordered) - @test ArviZ.groupnames(InferenceData(; posterior)) === (:posterior,) - end - - @testset "conversion" begin - @test convert(InferenceData, idata) === idata - @test convert(NamedTuple, idata) === parent(idata) - @test NamedTuple(idata) === parent(idata) - a = idata.posterior.a - @test convert(InferenceData, a) isa InferenceData - @test convert(InferenceData, a).posterior.a == a - end - - @testset "show" begin - @testset "plain" begin - text = sprint(show, MIME("text/plain"), idata) - @test text == """ - InferenceData with groups: - > posterior - > prior - > observed_data""" - end - - @testset "html" begin - # TODO: improve test - text = sprint(show, MIME("text/html"), idata) - @test text isa String - @test occursin("InferenceData", text) - @test occursin("Dataset", text) - end - end -end - -@testset "InferenceData <-> PyObject" begin - idata1 = random_data() - pyidata1 = PyObject(idata1) - @test pyidata1 isa PyObject - @test pyisinstance(pyidata1, ArviZ.arviz.InferenceData) - idata2 = convert(InferenceData, pyidata1) - test_idata_approx_equal(idata2, idata1) -end - -@testset "convert_to_inference_data(obj::PyObject)" begin - data = Dict(:z => randn(4, 100, 10)) - idata1 = convert_to_inference_data(data) - idata2 = convert_to_inference_data(PyObject(data)) - @test idata2 isa InferenceData - @test idata2.posterior.z ≈ collect(idata1.posterior.z) -end - @testset "extract_dataset" begin idata = random_data() post = extract_dataset(idata, :posterior; combined=false) @@ -183,68 +31,6 @@ end test_idata_approx_equal(new_idata1, new_idata2) end -@testset "ArviZ.convert_to_dataset(::InferenceData; kwargs...)" begin - idata = random_data() - @test ArviZ.convert_to_dataset(idata) === idata.posterior - @test ArviZ.convert_to_dataset(idata; group=:prior) === idata.prior -end - -@testset "ArviZ.default_var_name" begin - x = randn(4, 5) - @test ArviZ.default_var_name(x) === :x - @test ArviZ.default_var_name(DimensionalData.DimArray(x, (:a, :b))) === :x - @test ArviZ.default_var_name(DimensionalData.DimArray(x, (:a, :b); name=:y)) === :y -end - -@testset "convert_to_inference_data" begin - @testset "convert_to_inference_data(::AbstractDimStack)" begin - ds = ArviZ.namedtuple_to_dataset((x=randn(4, 10), y=randn(4, 10, 5))) - idata1 = convert_to_inference_data(ds; group=:prior) - @test ArviZ.groupnames(idata1) == (:prior,) - idata2 = InferenceData(; prior=ds) - @test idata2 == idata1 - idata3 = convert_to_inference_data(parent(ds); group=:prior) - @test idata3 == idata1 - end - - @testset "convert_to_inference_data(::$T)" for T in (NamedTuple, Dict) - data = (A=randn(2, 10, 2), B=randn(2, 10, 5, 2)) - if T <: Dict - data = Dict(pairs(data)) - end - idata = convert_to_inference_data(data) - check_idata_schema(idata) - @test ArviZ.groupnames(idata) == (:posterior,) - posterior = idata.posterior - @test posterior.A == data[:A] - @test posterior.B == data[:B] - idata2 = convert_to_inference_data(data; group=:prior) - check_idata_schema(idata2) - @test ArviZ.groupnames(idata2) == (:prior,) - @test idata2.prior == idata.posterior - end - - @testset "convert_to_inference_data(::$T)" for T in (Array, DimensionalData.DimArray) - data = randn(2, 10, 2) - if T <: DimensionalData.DimArray - data = DimensionalData.DimArray(data, (:a, :b, :c); name=:y) - end - idata = convert_to_inference_data(data) - check_idata_schema(idata) - @test ArviZ.groupnames(idata) == (:posterior,) - posterior = idata.posterior - if T <: DimensionalData.DimArray - @test posterior.y == data - else - @test posterior.x == data - end - idata2 = convert_to_inference_data(data; group=:prior) - check_idata_schema(idata2) - @test ArviZ.groupnames(idata2) == (:prior,) - @test idata2.prior == idata.posterior - end -end - @testset "from_dict" begin posterior = Dict(:A => randn(2, 10, 2), :B => randn(2, 10, 5, 2)) prior = Dict(:C => randn(2, 10, 2), :D => randn(2, 10, 5, 2)) diff --git a/test/test_utils.jl b/test/test_utils.jl index 41a01edd..73a72191 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -43,22 +43,6 @@ pandas = ArviZ.pandas @test ArviZ.frompytype([[x2]]) == [[x]] end - @testset "rekey" begin - orig = (x=3, y=4, z=5) - keymap = (x=:y, y=:a) - @testset "NamedTuple" begin - new = @inferred NamedTuple ArviZ.rekey(orig, keymap) - @test new isa NamedTuple - @test new == (y=3, a=4, z=5) - end - @testset "Dict" begin - orig_dict = Dict(pairs(orig)) - new = @inferred ArviZ.rekey(orig_dict, keymap) - @test new isa typeof(orig_dict) - @test new == Dict(:y => 3, :a => 4, :z => 5) - end - end - @testset "topandas" begin @testset "DataFrames.DataFrame -> pandas.DataFrame" begin columns = [:a, :b, :c] @@ -143,9 +127,4 @@ pandas = ArviZ.pandas @test data2_format.y == data2.y @test eltype(data2_format.y) === eltype(data2.y) end - - @testset "package_version" begin - @test ArviZ.package_version(ArviZ) isa VersionNumber - @test ArviZ.package_version(PyCall) isa VersionNumber - end end diff --git a/test/test_xarray.jl b/test/test_xarray.jl new file mode 100644 index 00000000..782abafe --- /dev/null +++ b/test/test_xarray.jl @@ -0,0 +1,68 @@ +using ArviZ, DimensionalData, PyCall, Test + +@testset "xarray interop" begin + @testset "Dataset <-> xarray" begin + nchains = 4 + ndraws = 100 + nshared = 3 + xdims = (:chain, :draw, :shared) + x = DimArray(randn(nchains, ndraws, nshared), xdims) + ydims = (:chain, :draw, Dim{:ydim1}(Any["a", "b"]), :shared) + y = DimArray(randn(nchains, ndraws, 2, nshared), ydims) + metadata = Dict(:prop1 => "val1", :prop2 => "val2") + ds = ArviZ.Dataset((; x, y); metadata) + o = PyObject(ds) + @test o isa PyObject + @test pyisinstance(o, ArviZ.xarray.Dataset) + + @test issetequal(Symbol.(o.coords.keys()), (:chain, :draw, :shared, :ydim1)) + for (dim, coord) in o.coords.items() + @test collect(coord.values) == DimensionalData.index(ds, Symbol(dim)) + end + + variables = Dict(collect(o.data_vars.variables.items())) + @test "x" ∈ keys(variables) + @test x == variables["x"].values + @test variables["x"].dims == String.(xdims) + + @test "y" ∈ keys(variables) + @test y == variables["y"].values + @test variables["y"].dims == ("chain", "draw", "ydim1", "shared") + + # check that the Python object accesses the underlying Julia array + x[1] = 1 + @test x == variables["x"].values + + ds2 = convert(ArviZ.Dataset, o) + @test ds2 isa ArviZ.Dataset + @test ds2.x ≈ ds.x + @test ds2.y ≈ ds.y + dims1 = sort(collect(DimensionalData.dims(ds)); by=DimensionalData.name) + dims2 = sort(collect(DimensionalData.dims(ds2)); by=DimensionalData.name) + for (dim1, dim2) in zip(dims1, dims2) + @test DimensionalData.name(dim1) === DimensionalData.name(dim2) + @test DimensionalData.index(dim1) == DimensionalData.index(dim2) + if DimensionalData.index(dim1) isa AbstractRange + @test DimensionalData.index(dim2) isa AbstractRange + end + end + @test DimensionalData.metadata(ds2) == DimensionalData.metadata(ds) + end + + @testset "InferenceData <-> PyObject" begin + idata1 = random_data() + pyidata1 = PyObject(idata1) + @test pyidata1 isa PyObject + @test pyisinstance(pyidata1, ArviZ.arviz.InferenceData) + idata2 = convert(InferenceData, pyidata1) + test_idata_approx_equal(idata2, idata1) + end + + @testset "convert_to_inference_data(obj::PyObject)" begin + data = Dict(:z => randn(4, 100, 10)) + idata1 = convert_to_inference_data(data) + idata2 = convert_to_inference_data(PyObject(data)) + @test idata2 isa InferenceData + @test idata2.posterior.z ≈ collect(idata1.posterior.z) + end +end