From 974c5793d0ce88d63e1322151e8eb5252b0fabda Mon Sep 17 00:00:00 2001
From: Oskar Laverny <oskar.laverny@gmail.com>
Date: Sat, 23 Mar 2024 07:36:46 +0100
Subject: [PATCH] Add new `subsetdims` method (#187)

Fixes #186
---
 src/Copulas.jl             |  8 +++---
 src/SklarDist.jl           |  3 +-
 src/SubsetCopula.jl        | 56 ++++++++++++++++++++++++++++++++++++++
 test/margins_uniformity.jl |  1 +
 test/subsetcopula.jl       | 47 ++++++++++++++++++++++++++++++++
 5 files changed, 110 insertions(+), 5 deletions(-)
 create mode 100644 src/SubsetCopula.jl
 create mode 100644 test/subsetcopula.jl

diff --git a/src/Copulas.jl b/src/Copulas.jl
index e3e6b734..ea479c82 100644
--- a/src/Copulas.jl
+++ b/src/Copulas.jl
@@ -20,8 +20,7 @@ module Copulas
     include("utils.jl")
     include("Copula.jl")
     include("SklarDist.jl")
-    export pseudos, 
-           EmpiricalCopula, 
+    export pseudos,
            SklarDist
 
     # Others. 
@@ -80,9 +79,10 @@ module Copulas
            InvGaussianCopula,
            JoeCopula
 
+    # Subsetting
+    include("SubsetCopula.jl") # not exported yet. 
 
-
-       using PrecompileTools
+    using PrecompileTools
     @setup_workload begin
     # Putting some things in `@setup_workload` instead of `@compile_workload` can reduce the size of the
     # precompile file and potentially make loading faster.
diff --git a/src/SklarDist.jl b/src/SklarDist.jl
index 36c70647..1cd85e89 100644
--- a/src/SklarDist.jl
+++ b/src/SklarDist.jl
@@ -44,7 +44,8 @@ struct SklarDist{CT,TplMargins} <: Distributions.ContinuousMultivariateDistribut
         d = length(C)
         @assert length(m) == d
         @assert all(mᵢ isa Distributions.UnivariateDistribution for mᵢ in m)
-        return new{typeof(C),typeof(m)}(C,m)
+        tm = Tuple(m)
+        return new{typeof(C),typeof(tm)}(C,tm)
     end    
 end
 Base.length(S::SklarDist{CT,TplMargins}) where {CT,TplMargins} = length(S.C)
diff --git a/src/SubsetCopula.jl b/src/SubsetCopula.jl
new file mode 100644
index 00000000..335787ec
--- /dev/null
+++ b/src/SubsetCopula.jl
@@ -0,0 +1,56 @@
+"""
+    SubsetCopula{d,CT}
+
+Fields:
+  - `C::CT` - The copula
+  - `dims::Tuple{Int64}` - a Tuple representing which dimensions are used. 
+
+Constructor
+
+    SubsetCopula(C::Copula,dims)
+
+This class allows to construct a random vector specified as X[dims] where X is the random vector correspnding to C and dims is a selection of dimensions. 
+"""
+struct SubsetCopula{d,CT} <: Copula{d}
+    C::CT
+    dims::NTuple{d,Int64}
+    function SubsetCopula(C::Copula{d},dims) where d
+        # if Tuple(dims) == Tuple(1:d)
+        #     return C
+        # end
+        @assert all(dims .<= d)
+        return new{length(dims), typeof(C)}(C,Tuple(Int.(dims)))
+    end
+end
+Base.eltype(C::SubsetCopula{d,CT}) where {d,CT} = Base.eltype(C.C)
+function Distributions._rand!(rng::Distributions.AbstractRNG, C::SubsetCopula{d,CT}, x::AbstractVector{T}) where {T<:Real, d,CT}
+    u = Random.rand(rng,C.C)
+    x .= (u[i] for i in C.dims)
+    return x
+end
+function _cdf(C::SubsetCopula{d,CT},u) where {d,CT}
+    # Simplyu saturate dimensions that are not choosen.
+    v = ones(length(C.C))
+    for (i,j) in enumerate(C.dims)
+        v[j] = u[i]
+    end 
+    return Distributions.cdf(C.C,v)
+end
+
+# A few specialized constructors: 
+SubsetCopula(C::GaussianCopula,dims) = GaussianCopula(C.Σ[collect(dims),collect(dims)])
+SubsetCopula(C::TCopula{d,df,MT},dims) where {d,df,MT} = TCopula(df, C.Σ[collect(dims),collect(dims)])
+SubsetCopula(C::ArchimedeanCopula{d,TG},dims) where {d,TG} = ArchimedeanCopula(length(dims), C.G) # in particular for the independence this will work. 
+
+# We could add a few more for performance if needed: EmpiricalCopula, others... 
+
+
+
+"""
+    subsetdims(C::Copula,dims)
+    subsetdims(D::SklarDist, dims)
+
+If X is the random vector corresponding to `C` or `D`, this returns the distributions of  C[dims]. Has specialized methods for some copulas. 
+"""
+subsetdims(C::Copula{d},dims) where d = SubsetCopula(C,dims)
+subsetdims(D::SklarDist, dims) = SklarDist(subsetdims(D.C,dims), Tuple(D.m[i] for i in dims))
\ No newline at end of file
diff --git a/test/margins_uniformity.jl b/test/margins_uniformity.jl
index 13b2b0a4..6b36f710 100644
--- a/test/margins_uniformity.jl
+++ b/test/margins_uniformity.jl
@@ -33,6 +33,7 @@
         SurvivalCopula(ClaytonCopula(2,-0.7),(1,2)),
         RafteryCopula(2, 0.2),
         RafteryCopula(3, 0.5),
+        Copulas.SubsetCopula(RafteryCopula(3, 0.5), (2,1)),
         # Others ? Yes probably others too ! 
     )
 
diff --git a/test/subsetcopula.jl b/test/subsetcopula.jl
new file mode 100644
index 00000000..eaebefd5
--- /dev/null
+++ b/test/subsetcopula.jl
@@ -0,0 +1,47 @@
+@testitem "Check that subsetcopula works as intended" begin
+    using Copulas, Distributions
+
+    cops = (
+        IndependentCopula(3),
+        AMHCopula(3,0.6),
+        AMHCopula(4,-0.3),
+        ClaytonCopula(2,-0.7),
+        ClaytonCopula(3,-0.1),
+        ClaytonCopula(4,7.),
+        FrankCopula(2,-5.),
+        FrankCopula(3,12.),
+        FrankCopula(4,6.),
+        FrankCopula(4,150.),
+        JoeCopula(3,7.),
+        GumbelCopula(4,7.),
+        GumbelCopula(4,20.),
+        GumbelBarnettCopula(3,0.7),
+        InvGaussianCopula(4,0.05),
+        InvGaussianCopula(3,8.),
+        GaussianCopula([1 0.5; 0.5 1]),
+        TCopula(4, [1 0.5; 0.5 1]),
+        FGMCopula(2,1),
+        MCopula(4),
+        WCopula(2),
+        PlackettCopula(2.0),
+        EmpiricalCopula(randn(2,100),pseudo_values=false),
+        SurvivalCopula(ClaytonCopula(2,-0.7),(1,2)),
+        RafteryCopula(2, 0.2),
+        RafteryCopula(3, 0.5),
+    )
+
+    for C in cops
+        d = length(C)
+        D = SklarDist(C,(LogNormal() for i in 1:d))
+        sC = Copulas.subsetdims(C,(1,2))
+        sD = Copulas.subsetdims(D,(2,1))
+
+        # The following methods have to work: 
+        u = rand(sC,10)
+        v = rand(sD,10)
+        cdf(sC,u)
+        cdf(sD,v)
+
+        @test sD.C == Copulas.subsetdims(C,(2,1)) # check for coherence. 
+    end
+end
\ No newline at end of file