From e5eeb7ef55cbf4d21b3d7e49fd46a2bacd297758 Mon Sep 17 00:00:00 2001 From: MartinuzziFrancesco Date: Wed, 8 Jan 2025 16:25:43 +0100 Subject: [PATCH] some more work on esn docstrings --- src/esn/deepesn.jl | 21 ++--- src/esn/esn.jl | 70 ++++++++++------- src/esn/esn_reservoir_drivers.jl | 129 +++++++++++++++---------------- src/esn/hybridesn.jl | 74 ++++++------------ src/predict.jl | 21 ++--- 5 files changed, 144 insertions(+), 171 deletions(-) diff --git a/src/esn/deepesn.jl b/src/esn/deepesn.jl index 7ce458fd..4b66975e 100644 --- a/src/esn/deepesn.jl +++ b/src/esn/deepesn.jl @@ -44,27 +44,22 @@ temporal features. Default is an RNN model. - `nla_type`: The type of non-linear activation used in the reservoir. Default is `NLADefault()`. - - `states_type`: Defines the type of states used in the ESN (e.g., standard states). - Default is `StandardStates()`. - - `washout`: The number of initial timesteps to be discarded in the ESN's training phase. - Default is 0. - - `rng`: Random number generator used for initializing weights. Default is the package's - default random number generator. + - `states_type`: Defines the type of states used in the ESN + (e.g., standard states). Default is `StandardStates()`. + - `washout`: The number of initial timesteps to be discarded + in the ESN's training phase. Default is 0. + - `rng`: Random number generator used for initializing weights. + Default is `Utils.default_rng()`. - `matrix_type`: The type of matrix used for storing the training data. Default is inferred from `train_data`. # Example ```julia -# Prepare your training data -train_data = [your_training_data_here] +train_data = rand(Float32, 3, 100) # Create a DeepESN with specific parameters -deepESN = DeepESN(train_data, 10, 100; depth=3, washout=100) - -# Proceed with training and prediction (pseudocode) -train(deepESN, target_data) -prediction = predict(deepESN, new_data) +deepESN = DeepESN(train_data, 3, 100; depth=3, washout=100) ``` """ function DeepESN(train_data, diff --git a/src/esn/esn.jl b/src/esn/esn.jl index b585db2a..7c3010d8 100644 --- a/src/esn/esn.jl +++ b/src/esn/esn.jl @@ -15,33 +15,42 @@ end """ ESN(train_data; kwargs...) -> ESN -Creates an Echo State Network (ESN) using specified parameters and training data, suitable for various machine learning tasks. +Creates an Echo State Network (ESN). -# Parameters +# Arguments - - `train_data`: Matrix of training data (columns as time steps, rows as features). + - `train_data`: Matrix of training data `num_features x time_steps`. - `variation`: Variation of ESN (default: `Default()`). - `input_layer`: Input layer of ESN. - `reservoir`: Reservoir of the ESN. - `bias`: Bias vector for each time step. + - `rng`: Random number generator used for initializing weights. + Default is `Utils.default_rng()`. - `reservoir_driver`: Mechanism for evolving reservoir states (default: `RNN()`). - `nla_type`: Non-linear activation type (default: `NLADefault()`). - `states_type`: Format for storing states (default: `StandardStates()`). - `washout`: Initial time steps to discard (default: `0`). - `matrix_type`: Type of matrices used internally (default: type of `train_data`). -# Returns - - - An initialized ESN instance with specified parameters. - # Examples -```julia -using ReservoirComputing - -train_data = rand(10, 100) # 10 features, 100 time steps +```jldoctest +julia> train_data = rand(Float32, 10, 100) # 10 features, 100 time steps +10×100 Matrix{Float32}: + 0.567676 0.154756 0.584611 0.294015 … 0.573946 0.894333 0.429133 + 0.327073 0.729521 0.804667 0.263944 0.559342 0.020167 0.897862 + 0.453606 0.800058 0.568311 0.749441 0.0713146 0.464795 0.532854 + 0.0173253 0.536959 0.722116 0.910328 0.00224048 0.00202501 0.631075 + 0.366744 0.119761 0.100593 0.125122 0.700562 0.675474 0.102947 + 0.539737 0.768351 0.54681 0.648672 … 0.256738 0.223784 0.94327 + 0.558099 0.42676 0.1948 0.735625 0.0989234 0.119342 0.624182 + 0.0603135 0.929999 0.263439 0.0372732 0.066125 0.332769 0.25562 + 0.4463 0.334423 0.444679 0.311695 0.0494497 0.27171 0.214925 + 0.987182 0.898593 0.295241 0.233098 0.789699 0.453692 0.759205 + +julia> esn = ESN(train_data, 10, 300; washout=10) +ESN(10 => 300) -esn = ESN(train_data; reservoir=RandSparseReservoir(200), washout=10) ``` """ function ESN(train_data, @@ -86,6 +95,9 @@ function (esn::AbstractEchoStateNetwork)(prediction::AbstractPrediction, kwargs...) end +Base.show(io::IO, esn::ESN) = + print(io, "ESN(", size(esn.train_data, 1), " => ", size(esn.reservoir_matrix, 1), ")") + #training dispatch on esn """ train(esn::AbstractEchoStateNetwork, target_data, training_method = StandardRidge(0.0)) @@ -98,27 +110,29 @@ Trains an Echo State Network (ESN) using the provided target data and a specifie - `target_data`: Supervised training data for the ESN. - `training_method`: The method for training the ESN (default: `StandardRidge(0.0)`). -# Returns - - - The trained ESN model. Its type and structure depend on `training_method` and the ESN's implementation. - -# Returns -The trained ESN model. The exact type and structure of the return value depends on the -`training_method` and the specific ESN implementation. +# Example -```julia -using ReservoirComputing +```jldoctest +julia> train_data = rand(Float32, 10, 100) # 10 features, 100 time steps +10×100 Matrix{Float32}: + 0.11437 0.425367 0.585867 0.34078 … 0.0531493 0.761425 0.883164 + 0.301373 0.497806 0.279603 0.802417 0.49873 0.270156 0.333333 + 0.135224 0.660179 0.394233 0.512753 0.901221 0.784377 0.687691 + 0.510203 0.877234 0.614245 0.978405 0.332775 0.768826 0.527077 + 0.955027 0.398322 0.312156 0.981938 0.473357 0.156704 0.476101 + 0.353024 0.997632 0.164328 0.470783 … 0.745613 0.85797 0.465201 + 0.966044 0.194299 0.599167 0.040475 0.0996013 0.325959 0.770103 + 0.292068 0.495138 0.481299 0.214566 0.819573 0.155951 0.227168 + 0.133498 0.451058 0.0761995 0.90421 0.994212 0.332164 0.545112 + 0.214467 0.791524 0.124105 0.951805 0.947166 0.954244 0.889733 -# Initialize an ESN instance and target data -esn = ESN(train_data; reservoir=RandSparseReservoir(200), washout=10) -target_data = rand(size(train_data, 2)) +julia> esn = ESN(train_data, 10, 300; washout=10) +ESN(10 => 300) -# Train the ESN using the default training method -trained_esn = train(esn, target_data) +julia> output_layer = train(esn, rand(Float32, 3, 90)) +OutputLayer successfully trained with output size: 3 -# Train the ESN using a custom training method -trained_esn = train(esn, target_data; training_method=StandardRidge(1.0)) ``` """ function train(esn::AbstractEchoStateNetwork, diff --git a/src/esn/esn_reservoir_drivers.jl b/src/esn/esn_reservoir_drivers.jl index 65f6e624..7601a09d 100644 --- a/src/esn/esn_reservoir_drivers.jl +++ b/src/esn/esn_reservoir_drivers.jl @@ -1,31 +1,26 @@ abstract type AbstractReservoirDriver end """ - create_states( - reservoir_driver::AbstractReservoirDriver, - train_data, - washout, - reservoir_matrix, - input_matrix, - bias_vector - ) + create_states(reservoir_driver::AbstractReservoirDriver, train_data, washout, + reservoir_matrix, input_matrix, bias_vector) -Create and return the trained Echo State Network (ESN) states according to the specified reservoir driver. +Create and return the trained Echo State Network (ESN) states according to the +specified reservoir driver. # Arguments - - `reservoir_driver::AbstractReservoirDriver`: The reservoir driver that determines how the ESN states evolve over time. + - `reservoir_driver`: The reservoir driver that determines how the ESN states evolve + over time. - `train_data`: The training data used to train the ESN. - - `washout::Int`: The number of initial time steps to discard during training to allow the reservoir dynamics to wash out the initial conditions. - - `reservoir_matrix`: The reservoir matrix representing the dynamic, recurrent part of the ESN. - - `input_matrix`: The input matrix that defines the connections between input features and reservoir nodes. - - `bias_vector`: The bias vector to be added at each time step during the reservoir update. - -# Returns - - - A matrix of trained ESN states, where each column represents the state at a specific time step. + - `washout`: The number of initial time steps to discard during training to allow the + reservoir dynamics to wash out the initial conditions. + - `reservoir_matrix`: The reservoir matrix representing the dynamic, recurrent part of + the ESN. + - `input_matrix`: The input matrix that defines the connections between input features + and reservoir nodes. + - `bias_vector`: The bias vector to be added at each time step during the reservoir + update. -This function is responsible for creating and returning the states of the ESN during training based on the provided training data and parameters. """ function create_states(reservoir_driver::AbstractReservoirDriver, train_data, @@ -99,7 +94,8 @@ end RNN(activation_function, leaky_coefficient) RNN(;activation_function=tanh, leaky_coefficient=1.0) -Returns a Recurrent Neural Network (RNN) initializer for the Echo State Network (ESN). +Returns a Recurrent Neural Network (RNN) initializer for +echo state networks (`ESN`). # Arguments @@ -108,11 +104,12 @@ Returns a Recurrent Neural Network (RNN) initializer for the Echo State Network # Keyword Arguments - - `activation_function`: The activation function used in the RNN. Defaults to `tanh`. - - `leaky_coefficient`: The leaky coefficient used in the RNN. Defaults to 1.0. + - `activation_function`: The activation function used in the RNN. + Defaults to `tanh_fast`. + - `leaky_coefficient`: The leaky coefficient used in the RNN. + Defaults to 1.0. + -This function creates an RNN object with the specified activation function and leaky coefficient, -which can be used as a reservoir driver in the ESN. """ function RNN(; activation_function=NNlib.fast_act(tanh), leaky_coefficient=1.0) RNN(activation_function, leaky_coefficient) @@ -163,25 +160,33 @@ end MRNN(;activation_function=[tanh, sigmoid], leaky_coefficient=1.0, scaling_factor=fill(leaky_coefficient, length(activation_function))) -Returns a Multiple RNN (MRNN) initializer for the Echo State Network (ESN), introduced in [^lun]. +Returns a Multiple RNN (MRNN) initializer for the Echo State Network (ESN), +introduced in [^Lun2015]. # Arguments - - `activation_function`: A vector of activation functions used in the MRNN. + - `activation_function`: A vector of activation functions used + in the MRNN. - `leaky_coefficient`: The leaky coefficient used in the MRNN. - - `scaling_factor`: A vector of scaling factors for combining activation functions. + - `scaling_factor`: A vector of scaling factors for combining activation + functions. # Keyword Arguments - - `activation_function`: A vector of activation functions used in the MRNN. Defaults to `[tanh, sigmoid]`. - - `leaky_coefficient`: The leaky coefficient used in the MRNN. Defaults to 1.0. - - `scaling_factor`: A vector of scaling factors for combining activation functions. Defaults to an array of the same size as `activation_function` with all elements set to `leaky_coefficient`. + - `activation_function`: A vector of activation functions used in the MRNN. + Defaults to `[tanh, sigmoid]`. + - `leaky_coefficient`: The leaky coefficient used in the MRNN. + Defaults to 1.0. + - `scaling_factor`: A vector of scaling factors for combining activation functions. + Defaults to an array of the same size as `activation_function` with all + elements set to `leaky_coefficient`. -This function creates an MRNN object with the specified activation functions, leaky coefficient, and scaling factors, which can be used as a reservoir driver in the ESN. +This function creates an MRNN object with the specified activation functions, +leaky coefficient, and scaling factors, which can be used as a reservoir driver +in the ESN. -# Reference: -[^lun]: Lun, Shu-Xian, et al. +[^Lun2015]: Lun, Shu-Xian, et al. "_A novel model of leaky integrator echo state network for time-series prediction._" Neurocomputing 159 (2015): 58-66. """ @@ -208,16 +213,6 @@ function next_state!(out, mrnn::MRNN, x, y, W, W_in, b, tmp_array) return out end -#= -function next_state!(out, mrnn::MRNN, x, y, W, W_in, b, tmp_array) - rnn_next_state = (1-mrnn.leaky_coefficient).*x - for i=1:length(mrnn.scaling_factor) - rnn_next_state += mrnn.scaling_factor[i]*mrnn.activation_function[i].((W*x).+(W_in*y).+b) - end - rnn_next_state -end -=# - function allocate_tmp(::MRNN, tmp_type, res_size) return [Adapt.adapt(tmp_type, zeros(res_size, 1)) for i in 1:2] end @@ -236,19 +231,16 @@ end """ FullyGated() -Returns a Fully Gated Recurrent Unit (FullyGated) initializer for the Echo State Network (ESN). +Returns a Fully Gated Recurrent Unit (FullyGated) initializer +for the Echo State Network (ESN). -This function creates a FullyGated object, which can be used as a reservoir driver in the ESN. -The FullyGated variant is described in the literature reference [^cho]. +Returns the standard gated recurrent unit [^Cho2014] as a driver for the +echo state network (`ESN`). -# Returns - - `FullyGated`: A FullyGated reservoir driver. - -# Reference - -[^cho]: Cho, Kyunghyun, et al. - "_Learning phrase representations using RNN encoder-decoder for statistical machine translation._" +[^Cho2014]: Cho, Kyunghyun, et al. + "_Learning phrase representations using RNN encoder-decoder + for statistical machine translation._" arXiv preprint arXiv:1406.1078 (2014). """ struct FullyGated <: AbstractGRUVariant end @@ -256,9 +248,10 @@ struct FullyGated <: AbstractGRUVariant end """ Minimal() -Returns a minimal GRU ESN initializer as described in [^Zhou]. +Returns a minimal GRU ESN initializer as described in [^Zhou2016]. -[^Zhou]: Zhou, Guo-Bing, et al. "_Minimal gated unit for recurrent neural networks._" +[^Zhou2016]: Zhou, Guo-Bing, et al. "_Minimal gated unit for recurrent + neural networks._" International Journal of Automation and Computing 13.3 (2016): 226-234. """ struct Minimal <: AbstractGRUVariant end @@ -271,23 +264,25 @@ struct Minimal <: AbstractGRUVariant end bias = fill(DenseLayer(), 2), variant = FullyGated()) -Returns a Gated Recurrent Unit (GRU) reservoir driver for Echo State Networks (ESNs). This driver is based on the GRU architecture [^Cho], which is designed to capture temporal dependencies in data and is commonly used in various machine learning applications. +Returns a Gated Recurrent Unit (GRU) reservoir driver for Echo State Network (`ESN`). +This driver is based on the GRU architecture [^Cho2014]. # Arguments - - `activation_function`: An array of activation functions for the GRU layers. By default, it uses sigmoid activation functions for the update gate, reset gate, and tanh for the hidden state. - - `inner_layer`: An array of inner layers used in the GRU architecture. By default, it uses two dense layers. - - `reservoir`: An array of reservoir layers. By default, it uses two random sparse reservoirs. - - `bias`: An array of bias layers for the GRU. By default, it uses two dense layers. - - `variant`: The GRU variant to use. By default, it uses the "FullyGated" variant. - -# Returns - -A GRUParams object containing the parameters needed for the GRU-based reservoir driver. + - `activation_function`: An array of activation functions for the GRU layers. + By default, it uses sigmoid activation functions for the update gate, reset gate, + and tanh for the hidden state. + - `inner_layer`: An array of inner layers used in the GRU architecture. + By default, it uses two dense layers. + - `reservoir`: An array of reservoir layers. + By default, it uses two random sparse reservoirs. + - `bias`: An array of bias layers for the GRU. + By default, it uses two dense layers. + - `variant`: The GRU variant to use. + By default, it uses the "FullyGated" variant. -# References -[^Cho]: Cho, Kyunghyun, et al. +[^Cho2014]: Cho, Kyunghyun, et al. "_Learning phrase representations using RNN encoder-decoder for statistical machine translation._" arXiv preprint arXiv:1406.1078 (2014). """ diff --git a/src/esn/hybridesn.jl b/src/esn/hybridesn.jl index 13c8b642..5453c814 100644 --- a/src/esn/hybridesn.jl +++ b/src/esn/hybridesn.jl @@ -24,8 +24,8 @@ end """ KnowledgeModel(prior_model, u0, tspan, datasize) -Constructs a `Hybrid` variation of Echo State Networks (ESNs) integrating a knowledge-based model -(`prior_model`) with ESNs for advanced training and prediction in chaotic systems. +Constructs a `Hybrid` variation of Echo State Networks (ESNs) [^Pathak2018] +integrating a knowledge-based model (`prior_model`) with ESNs. # Parameters @@ -34,15 +34,7 @@ Constructs a `Hybrid` variation of Echo State Networks (ESNs) integrating a know - `tspan`: Time span as a tuple, indicating the duration for model operation. - `datasize`: The size of the data to be processed. -# Returns - - - A `Hybrid` struct instance representing the combined ESN and knowledge-based model. - -This method is effective for chaotic processes as highlighted in [^Pathak]. - -Reference: - -[^Pathak]: Jaideep Pathak et al. +[^Pathak2018]: Jaideep Pathak et al. "Hybrid Forecasting of Chaotic Processes: Using Machine Learning in Conjunction with a Knowledge-Based Model" (2018). """ @@ -59,11 +51,7 @@ end HybridESN(model, train_data, in_size, res_size; kwargs...) Construct a Hybrid Echo State Network (ESN) model that integrates -traditional Echo State Networks with a predefined knowledge model for -enhanced performance on chaotic systems or complex datasets. This -constructor allows for the creation of a customized ESN architecture by -specifying the reservoir size, input size, and various other parameters that -influence the network's behavior and learning capacity. +traditional Echo State Networks with a predefined knowledge model [^Pathak2018]. # Parameters @@ -79,41 +67,29 @@ influence the network's behavior and learning capacity. # Optional Keyword Arguments - - `input_layer`: A function to initialize the input matrix. Default is `scaled_rand`. - - `reservoir`: A function to initialize the reservoir matrix. Default is `rand_sparse`. - - `bias`: A function to initialize the bias vector. Default is `zeros64`. - - `reservoir_driver`: The driving system for the reservoir. Default is an RNN model. + - `input_layer`: A function to initialize the input matrix. + Default is `scaled_rand`. + - `reservoir`: A function to initialize the reservoir matrix. + Default is `rand_sparse`. + - `bias`: A function to initialize the bias vector. + Default is `zeros32`. + - `reservoir_driver`: The driving system for the reservoir. + Default is an RNN model. - `nla_type`: The type of non-linear activation used in the reservoir. Default is `NLADefault()`. - - `states_type`: Defines the type of states used in the ESN (e.g., standard states). - Default is `StandardStates()`. - - `washout`: The number of initial timesteps to be discarded in the ESN's training phase. - Default is 0. - - `rng`: Random number generator used for initializing weights. Default is the package's - default random number generator. - - `T`: The data type for the matrices (e.g., `Float32`). Influences computational - efficiency and precision. - - `matrix_type`: The type of matrix used for storing the training data. Default is - inferred from `train_data`. - -# Returns - - - A `HybridESN` instance configured according to the provided parameters and - suitable for further training and prediction tasks. - -# Example - -```julia -# Define a KnowledgeModel -km = KnowledgeModel(prior_model_function, u0, (0, 100), 1000) - -# Create a HybridESN -hesn = HybridESN(km, train_data, 10, 100; washout=100) - -# Train and predict -train(hesn, target_data) -prediction = hesn(prediction_object, output_layer) -``` + - `states_type`: Defines the type of states used in the + ESN. Default is `StandardStates()`. + - `washout`: The number of initial timesteps to be + discarded in the ESN's training phase. Default is 0. + - `rng`: Random number generator used for initializing weights. + Default is `Utils.default_rng()`. + - `T`: The data type for the matrices (e.g., `Float32`). + - `matrix_type`: The type of matrix used for storing the training data. + Default is inferred from `train_data`. + +[^Pathak2018]: Jaideep Pathak et al. + "Hybrid Forecasting of Chaotic Processes: + Using Machine Learning in Conjunction with a Knowledge-Based Model" (2018). """ function HybridESN(model, train_data, diff --git a/src/predict.jl b/src/predict.jl index 1cfca9af..41a226c8 100644 --- a/src/predict.jl +++ b/src/predict.jl @@ -9,6 +9,9 @@ struct OutputLayer{T, I, S, L} <: AbstractOutputLayer last_value::L end +Base.show(io::IO, ol::OutputLayer) = + print(io, "OutputLayer successfully trained with output size: ", ol.out_size) + #prediction types """ Generative(prediction_len) @@ -19,14 +22,12 @@ subsequent prediction steps. # Parameters - - `prediction_len::Int`: The number of future steps to predict. + - `prediction_len`: The number of future steps to predict. # Description The `Generative` prediction method allows a model to perform multi-step forecasting by using its own previous predictions as inputs for future predictions. -This approach is especially useful in time series analysis, where each prediction -depends on the preceding data points. At each step, the model takes the current input, generates a prediction, and then incorporates that prediction into the input for the next step. @@ -51,21 +52,13 @@ of input features (`prediction_data`). # Parameters - - `prediction_data`: The input data used for prediction, typically structured as a matrix - where each column represents a sample, and each row represents a feature. + - `prediction_data`: The input data used for prediction, `feature` x `sample` # Description -The `Predictive` prediction method is a standard approach -in supervised machine learning tasks. It uses the provided input data +The `Predictive` prediction method uses the provided input data (`prediction_data`) to produce corresponding labels or outputs based -on the learned relationships in the model. Unlike generative prediction, -this method does not recursively feed predictions into the model; -instead, it operates on fixed input data to produce a single batch of predictions. - -This method is suitable for tasks like classification, -regression, or other use cases where the input features -and the number of steps are predefined. +on the learned relationships in the model. """ function Predictive(prediction_data) prediction_len = size(prediction_data, 2)