Merge pull request #239 from SciML/fm/ql

General codebase improvements
SciML · Jan 8, 2025 · 9e4711f · 9e4711f
2 parents 6b828a4 + 113ba19
commit 9e4711f
Show file tree

Hide file tree

Showing 11 changed files with 926 additions and 857 deletions.
diff --git a/Project.toml b/Project.toml
@@ -6,13 +6,10 @@ version = "0.10.5"
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 CellularAutomata = "878138dc-5b27-11ea-1a71-cb95d38d6b29"
-Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
-PartialFunctions = "570af359-4316-4cb7-8c74-252c00c2016b"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
-Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 WeightInitializers = "d49dbf32-c5c2-4618-8acc-27bb2598ef2d"
 
@@ -29,19 +26,17 @@ Adapt = "4.1.1"
 Aqua = "0.8"
 CellularAutomata = "0.0.2"
 DifferentialEquations = "7.15.0"
-Distances = "0.10"
 LIBSVM = "0.8"
 LinearAlgebra = "1.10"
 MLJLinearModels = "0.9.2, 0.10"
 NNlib = "0.9.26"
-PartialFunctions = "1.2"
 Random = "1.10"
 Reexport = "1.2.2"
 SafeTestsets = "0.1"
 Statistics = "1.10"
 StatsBase = "0.34.4"
 Test = "1"
-WeightInitializers = "1.0.4"
+WeightInitializers = "1.0.5"
 julia = "1.10"
 
 [extras]
@@ -51,7 +46,9 @@ LIBSVM = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b"
 MLJLinearModels = "6ee0df7b-362f-4a72-a706-9e79364fb692"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Aqua", "Test", "SafeTestsets", "Random", "DifferentialEquations", "MLJLinearModels", "LIBSVM"]
+test = ["Aqua", "Test", "SafeTestsets", "Random", "DifferentialEquations",
+    "MLJLinearModels", "LIBSVM", "Statistics"]
diff --git a/src/ReservoirComputing.jl b/src/ReservoirComputing.jl
@@ -1,126 +1,16 @@
 module ReservoirComputing
 
-using Adapt
-using CellularAutomata
-using Distances
-using LinearAlgebra
-using NNlib
-using PartialFunctions
-using Random
+using Adapt: adapt
+using CellularAutomata: CellularAutomaton
+using LinearAlgebra: eigvals, mul!, I
+using NNlib: fast_act, sigmoid
+using Random: Random, AbstractRNG
 using Reexport: Reexport, @reexport
-using Statistics
 using StatsBase: sample
 using WeightInitializers: DeviceAgnostic, PartialFunction, Utils
 @reexport using WeightInitializers
 
-#define global types
 abstract type AbstractReservoirComputer end
-abstract type AbstractOutputLayer end
-abstract type AbstractPrediction end
-#should probably move some of these
-abstract type AbstractGRUVariant end
-
-#general output layer struct
-struct OutputLayer{T, I, S, L} <: AbstractOutputLayer
-    training_method::T
-    output_matrix::I
-    out_size::S
-    last_value::L
-end
-
-#prediction types
-"""
-    Generative(prediction_len)
-
-A prediction strategy that enables models to generate autonomous multi-step
-forecasts by recursively feeding their own outputs back as inputs for
-subsequent prediction steps.
-
-# Parameters
-
-  - `prediction_len::Int`: The number of future steps to predict.
-
-# Description
-
-The `Generative` prediction method allows a model to perform multi-step
-forecasting by using its own previous predictions as inputs for future predictions.
-This approach is especially useful in time series analysis, where each prediction
-depends on the preceding data points.
-
-At each step, the model takes the current input, generates a prediction,
-and then incorporates that prediction into the input for the next step.
-This recursive process continues until the specified
-number of prediction steps (`prediction_len`) is reached.
-"""
-struct Generative{T} <: AbstractPrediction
-    prediction_len::T
-end
-
-struct Predictive{I, T} <: AbstractPrediction
-    prediction_data::I
-    prediction_len::T
-end
-
-"""
-    Predictive(prediction_data)
-
-A prediction strategy for supervised learning tasks,
-where a model predicts labels based on a provided set
-of input features (`prediction_data`).
-
-# Parameters
-
-  - `prediction_data`: The input data used for prediction, typically structured as a matrix
-    where each column represents a sample, and each row represents a feature.
-
-# Description
-
-The `Predictive` prediction method is a standard approach
-in supervised machine learning tasks. It uses the provided input data
-(`prediction_data`) to produce corresponding labels or outputs based
-on the learned relationships in the model. Unlike generative prediction,
-this method does not recursively feed predictions into the model;
-instead, it operates on fixed input data to produce a single batch of predictions.
-
-This method is suitable for tasks like classification,
-regression, or other use cases where the input features
-and the number of steps are predefined.
-"""
-function Predictive(prediction_data)
-    prediction_len = size(prediction_data, 2)
-    Predictive(prediction_data, prediction_len)
-end
-
-#fallbacks for initializers #eventually to remove once migrated to WeightInitializers.jl
-for initializer in (:rand_sparse, :delay_line, :delay_line_backward, :cycle_jumps,
-    :simple_cycle, :pseudo_svd,
-    :scaled_rand, :weighted_init, :informed_init, :minimal_init)
-    @eval begin
-        function ($initializer)(dims::Integer...; kwargs...)
-            return $initializer(Utils.default_rng(), Float32, dims...; kwargs...)
-        end
-        function ($initializer)(rng::AbstractRNG, dims::Integer...; kwargs...)
-            return $initializer(rng, Float32, dims...; kwargs...)
-        end
-        function ($initializer)(::Type{T}, dims::Integer...; kwargs...) where {T <: Number}
-            return $initializer(Utils.default_rng(), T, dims...; kwargs...)
-        end
-
-        # Partial application
-        function ($initializer)(rng::AbstractRNG; kwargs...)
-            return PartialFunction.Partial{Nothing}($initializer, rng, kwargs)
-        end
-        function ($initializer)(::Type{T}; kwargs...) where {T <: Number}
-            return PartialFunction.Partial{T}($initializer, nothing, kwargs)
-        end
-        function ($initializer)(rng::AbstractRNG, ::Type{T}; kwargs...) where {T <: Number}
-            return PartialFunction.Partial{T}($initializer, rng, kwargs)
-        end
-        function ($initializer)(; kwargs...)
-            return PartialFunction.Partial{Nothing}($initializer, nothing, kwargs)
-        end
-    end
-end
 
 #general
 include("states.jl")
@@ -130,8 +20,7 @@ include("predict.jl")
 include("train/linear_regression.jl")
 
 #esn
-include("esn/esn_input_layers.jl")
-include("esn/esn_reservoirs.jl")
+include("esn/esn_inits.jl")
 include("esn/esn_reservoir_drivers.jl")
 include("esn/esn.jl")
 include("esn/deepesn.jl")
@@ -155,9 +44,7 @@ export scaled_rand, weighted_init, informed_init, minimal_init
 export rand_sparse, delay_line, delay_line_backward, cycle_jumps, simple_cycle, pseudo_svd
 export RNN, MRNN, GRU, GRUParams, FullyGated, Minimal
 export train
-export ESN
-export HybridESN, KnowledgeModel
-export DeepESN
+export ESN, HybridESN, KnowledgeModel, DeepESN
 export RECA, sample
 export RandomMapping, RandomMaps
 export Generative, Predictive, OutputLayer

diff --git a/src/esn/deepesn.jl b/src/esn/deepesn.jl
@@ -44,27 +44,22 @@ temporal features.
     Default is an RNN model.
   - `nla_type`: The type of non-linear activation used in the reservoir.
     Default is `NLADefault()`.
-  - `states_type`: Defines the type of states used in the ESN (e.g., standard states).
-    Default is `StandardStates()`.
-  - `washout`: The number of initial timesteps to be discarded in the ESN's training phase.
-    Default is 0.
-  - `rng`: Random number generator used for initializing weights. Default is the package's
-    default random number generator.
+  - `states_type`: Defines the type of states used in the ESN
+    (e.g., standard states). Default is `StandardStates()`.
+  - `washout`: The number of initial timesteps to be discarded
+    in the ESN's training phase. Default is 0.
+  - `rng`: Random number generator used for initializing weights.
+    Default is `Utils.default_rng()`.
   - `matrix_type`: The type of matrix used for storing the training data.
     Default is inferred from `train_data`.
 
 # Example
 
 ```julia
-# Prepare your training data
-train_data = [your_training_data_here]
+train_data = rand(Float32, 3, 100)
 
 # Create a DeepESN with specific parameters
-deepESN = DeepESN(train_data, 10, 100; depth=3, washout=100)
-
-# Proceed with training and prediction (pseudocode)
-train(deepESN, target_data)
-prediction = predict(deepESN, new_data)
+deepESN = DeepESN(train_data, 3, 100; depth=3, washout=100)
 ```
 """
 function DeepESN(train_data,
@@ -82,7 +77,7 @@ function DeepESN(train_data,
         matrix_type=typeof(train_data))
     if states_type isa AbstractPaddedStates
         in_size = size(train_data, 1) + 1
-        train_data = vcat(Adapt.adapt(matrix_type, ones(1, size(train_data, 2))),
+        train_data = vcat(adapt(matrix_type, ones(1, size(train_data, 2))),
             train_data)
     end
 

diff --git a/src/esn/esn.jl b/src/esn/esn.jl
@@ -15,33 +15,41 @@ end
 """
     ESN(train_data; kwargs...) -> ESN
 
-Creates an Echo State Network (ESN) using specified parameters and training data, suitable for various machine learning tasks.
+Creates an Echo State Network (ESN).
 
-# Parameters
+# Arguments
 
-  - `train_data`: Matrix of training data (columns as time steps, rows as features).
+  - `train_data`: Matrix of training data `num_features x time_steps`.
   - `variation`: Variation of ESN (default: `Default()`).
   - `input_layer`: Input layer of ESN.
   - `reservoir`: Reservoir of the ESN.
   - `bias`: Bias vector for each time step.
+  - `rng`: Random number generator used for initializing weights.
+    Default is `Utils.default_rng()`.
   - `reservoir_driver`: Mechanism for evolving reservoir states (default: `RNN()`).
   - `nla_type`: Non-linear activation type (default: `NLADefault()`).
   - `states_type`: Format for storing states (default: `StandardStates()`).
   - `washout`: Initial time steps to discard (default: `0`).
   - `matrix_type`: Type of matrices used internally (default: type of `train_data`).
 
-# Returns
-
-  - An initialized ESN instance with specified parameters.
-
 # Examples
 
-```julia
-using ReservoirComputing
-
-train_data = rand(10, 100)  # 10 features, 100 time steps
-
-esn = ESN(train_data; reservoir=RandSparseReservoir(200), washout=10)
+```jldoctest
+julia> train_data = rand(Float32, 10, 100)  # 10 features, 100 time steps
+10×100 Matrix{Float32}:
+ 0.567676   0.154756  0.584611  0.294015   …  0.573946    0.894333    0.429133
+ 0.327073   0.729521  0.804667  0.263944      0.559342    0.020167    0.897862
+ 0.453606   0.800058  0.568311  0.749441      0.0713146   0.464795    0.532854
+ 0.0173253  0.536959  0.722116  0.910328      0.00224048  0.00202501  0.631075
+ 0.366744   0.119761  0.100593  0.125122      0.700562    0.675474    0.102947
+ 0.539737   0.768351  0.54681   0.648672   …  0.256738    0.223784    0.94327
+ 0.558099   0.42676   0.1948    0.735625      0.0989234   0.119342    0.624182
+ 0.0603135  0.929999  0.263439  0.0372732     0.066125    0.332769    0.25562
+ 0.4463     0.334423  0.444679  0.311695      0.0494497   0.27171     0.214925
+ 0.987182   0.898593  0.295241  0.233098      0.789699    0.453692    0.759205
+
+julia> esn = ESN(train_data, 10, 300; washout=10)
+ESN(10 => 300)
 ```
 """
 function ESN(train_data,
@@ -58,7 +66,7 @@ function ESN(train_data,
         matrix_type=typeof(train_data))
     if states_type isa AbstractPaddedStates
         in_size = size(train_data, 1) + 1
-        train_data = vcat(Adapt.adapt(matrix_type, ones(1, size(train_data, 2))),
+        train_data = vcat(adapt(matrix_type, ones(1, size(train_data, 2))),
             train_data)
     end
 
@@ -86,6 +94,10 @@ function (esn::AbstractEchoStateNetwork)(prediction::AbstractPrediction,
         kwargs...)
 end
 
+function Base.show(io::IO, esn::ESN)
+    print(io, "ESN(", size(esn.train_data, 1), " => ", size(esn.reservoir_matrix, 1), ")")
+end
+
 #training dispatch on esn
 """
     train(esn::AbstractEchoStateNetwork, target_data, training_method = StandardRidge(0.0))
@@ -98,27 +110,27 @@ Trains an Echo State Network (ESN) using the provided target data and a specifie
   - `target_data`: Supervised training data for the ESN.
   - `training_method`: The method for training the ESN (default: `StandardRidge(0.0)`).
 
-# Returns
-
-  - The trained ESN model. Its type and structure depend on `training_method` and the ESN's implementation.
-
-# Returns
-
-The trained ESN model. The exact type and structure of the return value depends on the
-`training_method` and the specific ESN implementation.
-
-```julia
-using ReservoirComputing
-
-# Initialize an ESN instance and target data
-esn = ESN(train_data; reservoir=RandSparseReservoir(200), washout=10)
-target_data = rand(size(train_data, 2))
-
-# Train the ESN using the default training method
-trained_esn = train(esn, target_data)
-
-# Train the ESN using a custom training method
-trained_esn = train(esn, target_data; training_method=StandardRidge(1.0))
+# Example
+
+```jldoctest
+julia> train_data = rand(Float32, 10, 100)  # 10 features, 100 time steps
+10×100 Matrix{Float32}:
+ 0.11437   0.425367  0.585867   0.34078   …  0.0531493  0.761425  0.883164
+ 0.301373  0.497806  0.279603   0.802417     0.49873    0.270156  0.333333
+ 0.135224  0.660179  0.394233   0.512753     0.901221   0.784377  0.687691
+ 0.510203  0.877234  0.614245   0.978405     0.332775   0.768826  0.527077
+ 0.955027  0.398322  0.312156   0.981938     0.473357   0.156704  0.476101
+ 0.353024  0.997632  0.164328   0.470783  …  0.745613   0.85797   0.465201
+ 0.966044  0.194299  0.599167   0.040475     0.0996013  0.325959  0.770103
+ 0.292068  0.495138  0.481299   0.214566     0.819573   0.155951  0.227168
+ 0.133498  0.451058  0.0761995  0.90421      0.994212   0.332164  0.545112
+ 0.214467  0.791524  0.124105   0.951805     0.947166   0.954244  0.889733
+
+julia> esn = ESN(train_data, 10, 300; washout=10)
+ESN(10 => 300)
+
+julia> output_layer = train(esn, rand(Float32, 3, 90))
+OutputLayer successfully trained with output size: 3
 ```
 """
 function train(esn::AbstractEchoStateNetwork,