Skip to content

Commit

Permalink
Add Ledoit Wolf shrinkage covariance estimator (#304)
Browse files Browse the repository at this point in the history
  • Loading branch information
norm4nn authored Nov 7, 2024
1 parent 7089254 commit 473060f
Show file tree
Hide file tree
Showing 2 changed files with 349 additions and 0 deletions.
222 changes: 222 additions & 0 deletions lib/scholar/covariance/ledoit_wolf.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
defmodule Scholar.Covariance.LedoitWolf do
@moduledoc """
Ledoit-Wolf is a particular form of shrinkage covariance estimator, where the shrinkage coefficient is computed using O. Ledoit and M. Wolf’s formula.
Ledoit and M. Wolf's formula as
described in "A Well-Conditioned Estimator for Large-Dimensional
Covariance Matrices", Ledoit and Wolf, Journal of Multivariate
Analysis, Volume 88, Issue 2, February 2004, pages 365-411.
"""
import Nx.Defn

@derive {Nx.Container, containers: [:covariance, :shrinkage, :location]}
defstruct [:covariance, :shrinkage, :location]

opts_schema = [
assume_centered: [
default: false,
type: :boolean,
doc: """
If `true`, data will not be centered before computation.
Useful when working with data whose mean is almost, but not exactly
zero.
If `false`, data will be centered before computation.
"""
]
]

@opts_schema NimbleOptions.new!(opts_schema)
@doc """
Estimate the shrunk Ledoit-Wolf covariance matrix.
## Options
#{NimbleOptions.docs(@opts_schema)}
## Return Values
The function returns a struct with the following parameters:
* `:covariance` - Tensor of shape `{num_features, num_features}`. Estimated covariance matrix.
* `:shrinkage` - Coefficient in the convex combination used for the computation of the shrunken estimate. Range is `[0, 1]`.
* `:location` - Tensor of shape `{num_features,}`.
Estimated location, i.e. the estimated mean.
## Examples
iex> key = Nx.Random.key(0)
iex> {x, _new_key} = Nx.Random.multivariate_normal(key, Nx.tensor([0.0, 0.0]), Nx.tensor([[0.4, 0.2], [0.2, 0.8]]), shape: {50}, type: :f32)
iex> model = Scholar.Covariance.LedoitWolf.fit(x)
iex> model.covariance
#Nx.Tensor<
f32[2][2]
[
[0.3557686507701874, 0.17340737581253052],
[0.17340737581253052, 1.0300586223602295]
]
>
iex> model.shrinkage
#Nx.Tensor<
f32
0.15034137666225433
>
iex> model.location
#Nx.Tensor<
f32[2]
[0.17184630036354065, 0.3276958167552948]
>
iex> key = Nx.Random.key(0)
iex> {x, _new_key} = Nx.Random.multivariate_normal(key, Nx.tensor([0.0, 0.0, 0.0]), Nx.tensor([[3.0, 2.0, 1.0], [1.0, 2.0, 3.0], [1.3, 1.0, 2.2]]), shape: {10}, type: :f32)
iex> model = Scholar.Covariance.LedoitWolf.fit(x)
iex> model.covariance
#Nx.Tensor<
f32[3][3]
[
[2.5945029258728027, 1.5078359842300415, 1.1623677015304565],
[1.5078359842300415, 2.106797456741333, 1.1812156438827515],
[1.1623677015304565, 1.1812156438827515, 1.4606266021728516]
]
>
iex> model.shrinkage
#Nx.Tensor<
f32
0.1908363401889801
>
iex> model.location
#Nx.Tensor<
f32[3]
[1.1228725910186768, 0.5419300198554993, 0.8678852319717407]
>
iex> key = Nx.Random.key(0)
iex> {x, _new_key} = Nx.Random.multivariate_normal(key, Nx.tensor([0.0, 0.0, 0.0]), Nx.tensor([[3.0, 2.0, 1.0], [1.0, 2.0, 3.0], [1.3, 1.0, 2.2]]), shape: {10}, type: :f32)
iex> cov = Scholar.Covariance.LedoitWolf.fit(x, assume_centered: true)
iex> cov.covariance
#Nx.Tensor<
f32[3][3]
[
[3.8574986457824707, 2.2048025131225586, 2.1504499912261963],
[2.2048025131225586, 2.4572863578796387, 1.7215262651443481],
[2.1504499912261963, 1.7215262651443481, 2.154898166656494]
]
>
"""

deftransform fit(x, opts \\ []) do
fit_n(x, NimbleOptions.validate!(opts, @opts_schema))
end

defnp fit_n(x, opts) do
{x, location} = center(x, opts)

{covariance, shrinkage} =
ledoit_wolf(x)

%__MODULE__{
covariance: covariance,
shrinkage: shrinkage,
location: location
}
end

defnp center(x, opts) do
x =
case Nx.shape(x) do
{_} -> Nx.new_axis(x, 1)
_ -> x
end

location =
if opts[:assume_centered] do
0
else
Nx.mean(x, axes: [0])
end

{x - location, location}
end

defnp ledoit_wolf(x) do
case Nx.shape(x) do
{_n, 1} ->
{Nx.mean(x ** 2) |> Nx.reshape({1, 1}), 0.0}

_ ->
ledoit_wolf_shrinkage(x)
end
end

defnp empirical_covariance(x) do
n = Nx.axis_size(x, 0)

covariance = Nx.dot(x, [0], x, [0]) / n

case Nx.shape(covariance) do
{} -> Nx.reshape(covariance, {1, 1})
_ -> covariance
end
end

defnp trace(x) do
x
|> Nx.take_diagonal()
|> Nx.sum()
end

defnp ledoit_wolf_shrinkage(x) do
case Nx.shape(x) do
{_, 1} ->
0

{n} ->
Nx.reshape(x, {1, n})
|> ledoit_wolf_shrinkage_complex()

_ ->
ledoit_wolf_shrinkage_complex(x)
end
end

defnp ledoit_wolf_shrinkage_complex(x) do
{num_samples, num_features} = Nx.shape(x)
emp_cov = empirical_covariance(x)

emp_cov_trace = trace(emp_cov)
mu = Nx.sum(emp_cov_trace) / num_features

flatten_delta = Nx.flatten(emp_cov)

indices =
Nx.shape(flatten_delta)
|> Nx.iota()

subtract = Nx.select(Nx.remainder(indices, num_features + 1) == 0, mu, 0)

delta =
(flatten_delta - subtract)
|> Nx.pow(2)
|> Nx.sum()

delta = delta / num_features

x2 = Nx.pow(x, 2)

beta =
(Nx.dot(x2, [0], x2, [0]) / num_samples - emp_cov ** 2)
|> Nx.sum()
|> Nx.divide(num_features * num_samples)

beta = Nx.min(beta, delta)
shrinkage = beta / delta

shrunk_cov = (1.0 - shrinkage) * emp_cov
mask = Nx.iota(Nx.shape(shrunk_cov))
selector = Nx.remainder(mask, num_features + 1) == 0
shrunk_cov = Nx.select(selector, shrunk_cov + shrinkage * mu, shrunk_cov)

{shrunk_cov, shrinkage}
end
end
127 changes: 127 additions & 0 deletions test/scholar/covariance/ledoit_wolf_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
defmodule Scholar.Covariance.LedoitWolfTest do
use Scholar.Case, async: true
alias Scholar.Covariance.LedoitWolf
doctest LedoitWolf

defp key do
Nx.Random.key(1)
end

test "fit test - all default options" do
key = key()

{x, _new_key} =
Nx.Random.multivariate_normal(
key,
Nx.tensor([0.0, 0.0, 0.0]),
Nx.tensor([[3.0, 2.0, 1.0], [1.0, 2.0, 3.0], [1.3, 1.0, 2.2]]),
shape: {10},
type: :f32
)

model = LedoitWolf.fit(x)

assert_all_close(
model.covariance,
Nx.tensor([
[1.439786434173584, -0.0, 0.0],
[-0.0, 1.439786434173584, 0.0],
[0.0, 0.0, 1.439786434173584]
]),
atol: 1.0e-3
)

assert_all_close(model.shrinkage, Nx.tensor(1.0), atol: 1.0e-3)

assert_all_close(
model.location,
Nx.tensor([-1.015519142150879, -0.4495307505130768, 0.06475571542978287]),
atol: 1.0e-3
)
end

test "fit test - :assume_centered is true" do
key = key()

{x, _new_key} =
Nx.Random.multivariate_normal(
key,
Nx.tensor([0.0, 0.0, 0.0]),
Nx.tensor([[3.0, 2.0, 1.0], [1.0, 2.0, 3.0], [1.3, 1.0, 2.2]]),
shape: {10},
type: :f32
)

model = LedoitWolf.fit(x, assume_centered: true)

assert_all_close(
model.covariance,
Nx.tensor([
[1.852303147315979, 0.0, 0.0],
[0.0, 1.852303147315979, 0.0],
[0.0, 0.0, 1.852303147315979]
]),
atol: 1.0e-3
)

assert_all_close(model.shrinkage, Nx.tensor(1.0), atol: 1.0e-3)

assert_all_close(model.location, Nx.tensor([0, 0, 0]), atol: 1.0e-3)
end

test "fit test 2" do
key = key()

{x, _new_key} =
Nx.Random.multivariate_normal(
key,
Nx.tensor([0.0, 0.0]),
Nx.tensor([[2.2, 1.5], [0.7, 1.1]]),
shape: {50},
type: :f32
)

model = LedoitWolf.fit(x)

assert_all_close(
model.covariance,
Nx.tensor([
[1.8378269672393799, 0.27215731143951416],
[0.27215731143951416, 1.2268550395965576]
]),
atol: 1.0e-3
)

assert_all_close(model.shrinkage, Nx.tensor(0.38731059432029724), atol: 1.0e-3)

assert_all_close(model.location, Nx.tensor([0.06882287561893463, 0.13750331103801727]),
atol: 1.0e-3
)
end

test "fit test - 1 dim x" do
key = key()

{x, _new_key} =
Nx.Random.multivariate_normal(key, Nx.tensor([0.0]), Nx.tensor([[0.4]]),
shape: {15},
type: :f32
)

x = Nx.flatten(x)

model = LedoitWolf.fit(x)

assert_all_close(
model.covariance,
Nx.tensor([
[0.5322133302688599]
]),
atol: 1.0e-3
)

assert_all_close(model.shrinkage, Nx.tensor(0.0), atol: 1.0e-3)

assert_all_close(model.location, Nx.tensor([0.060818854719400406]), atol: 1.0e-3)
end
end

0 comments on commit 473060f

Please sign in to comment.