From 2d964054f474f0051aa99d86dfc1ed08a097c6ed Mon Sep 17 00:00:00 2001 From: MartinuzziFrancesco Date: Sun, 21 Jan 2024 18:08:04 +0100 Subject: [PATCH] HybridESN working, modified docs and readme to follow changes --- README.md | 3 ++ docs/src/esn_tutorials/hybrid.md | 19 +++++--- docs/src/esn_tutorials/lorenz_basic.md | 14 +++--- src/ReservoirComputing.jl | 3 +- src/esn/esn.jl | 6 +-- src/esn/esn_input_layers.jl | 36 --------------- src/esn/esn_predict.jl | 10 ++--- src/esn/hybridesn.jl | 62 +++++++++++++++++++++----- 8 files changed, 84 insertions(+), 69 deletions(-) diff --git a/README.md b/README.md index 60f12963..2b123d66 100644 --- a/README.md +++ b/README.md @@ -104,3 +104,6 @@ If you use this library in your work, please cite: url = {http://jmlr.org/papers/v23/22-0611.html} } ``` +## Acknowledgements + +This project was possible thanks to initial funding through the [Google summer of code](https://summerofcode.withgoogle.com/) 2020 program. Francesco M. further acknowledges [ScaDS.AI](https://scads.ai/) and [RSC4Earth](https://rsc4earth.de/) for supporting the current progress on the library. diff --git a/docs/src/esn_tutorials/hybrid.md b/docs/src/esn_tutorials/hybrid.md index bf274f01..5682e9db 100644 --- a/docs/src/esn_tutorials/hybrid.md +++ b/docs/src/esn_tutorials/hybrid.md @@ -1,6 +1,6 @@ # Hybrid Echo State Networks -Following the idea of giving physical information to machine learning models, the hybrid echo state networks [^1] try to achieve this results by feeding model data into the ESN. In this example, it is explained how to create and leverage such models in ReservoirComputing.jl. The full script for this example is available [here](https://github.com/MartinuzziFrancesco/reservoir-computing-examples/blob/main/hybrid/hybrid.jl). This example was run on Julia v1.7.2. +Following the idea of giving physical information to machine learning models, the hybrid echo state networks [^1] try to achieve this results by feeding model data into the ESN. In this example, it is explained how to create and leverage such models in ReservoirComputing.jl. ## Generating the data @@ -47,17 +47,22 @@ function prior_model_data_generator(u0, tspan, tsteps, model = lorenz) end ``` -Given the initial condition, time span, and time steps, this function returns the data for the chosen model. Now, using the `Hybrid` method, it is possible to input all this information to the model. +Given the initial condition, time span, and time steps, this function returns the data for the chosen model. Now, using the `KnowledgeModel` method, it is possible to input all this information to `HybridESN`. ```@example hybrid using ReservoirComputing, Random Random.seed!(42) -hybrid = Hybrid(prior_model_data_generator, u0, tspan_train, train_len) +km = KnowledgeModel(prior_model_data_generator, u0, tspan_train, train_len) -esn = ESN(input_data, - reservoir = RandSparseReservoir(300), - variation = hybrid) +in_size = 3 +res_size = 300 +hesn = HybridESN( + km, + input_data, + in_size, + res_size; + reservoir = rand_sparse) ``` ## Training and Prediction @@ -65,7 +70,7 @@ esn = ESN(input_data, The training and prediction of the Hybrid ESN can proceed as usual: ```@example hybrid -output_layer = train(esn, target_data, StandardRidge(0.3)) +output_layer = train(hesn, target_data, StandardRidge(0.3)) output = esn(Generative(predict_len), output_layer) ``` diff --git a/docs/src/esn_tutorials/lorenz_basic.md b/docs/src/esn_tutorials/lorenz_basic.md index f820a36d..364c2c4b 100644 --- a/docs/src/esn_tutorials/lorenz_basic.md +++ b/docs/src/esn_tutorials/lorenz_basic.md @@ -1,6 +1,6 @@ # Lorenz System Forecasting -This example expands on the readme Lorenz system forecasting to better showcase how to use methods and functions provided in the library for Echo State Networks. Here the prediction method used is `Generative`, for a more detailed explanation of the differences between `Generative` and `Predictive` please refer to the other examples given in the documentation. The full script for this example is available [here](https://github.com/MartinuzziFrancesco/reservoir-computing-examples/blob/main/lorenz_basic/lorenz_basic.jl). This example was run on Julia v1.7.2. +This example expands on the readme Lorenz system forecasting to better showcase how to use methods and functions provided in the library for Echo State Networks. Here the prediction method used is `Generative`, for a more detailed explanation of the differences between `Generative` and `Predictive` please refer to the other examples given in the documentation. ## Generating the data @@ -46,15 +46,15 @@ using ReservoirComputing #define ESN parameters res_size = 300 +in_size = 3 res_radius = 1.2 res_sparsity = 6 / 300 input_scaling = 0.1 #build ESN struct -esn = ESN(input_data; - variation = Default(), - reservoir = RandSparseReservoir(res_size, radius = res_radius, sparsity = res_sparsity), - input_layer = WeightedLayer(scaling = input_scaling), +esn = ESN(input_data, in_size, res_size; + reservoir = rand_sparse(;radius = res_radius, sparsity = res_sparsity), + input_layer = weighted_init(;scaling = input_scaling), reservoir_driver = RNN(), nla_type = NLADefault(), states_type = StandardStates()) @@ -62,9 +62,9 @@ esn = ESN(input_data; Most of the parameters chosen here mirror the default ones, so a direct call is not necessary. The readme example is identical to this one, except for the explicit call. Going line by line to see what is happening, starting from `res_size`: this value determines the dimensions of the reservoir matrix. In this case, a size of 300 has been chosen, so the reservoir matrix will be 300 x 300. This is not always the case, since some input layer constructions can modify the dimensions of the reservoir, but in that case, everything is taken care of internally. -The `res_radius` determines the scaling of the spectral radius of the reservoir matrix; a proper scaling is necessary to assure the Echo State Property. The default value in the `RandSparseReservoir()` method is 1.0 in accordance with the most commonly followed guidelines found in the literature (see [^2] and references therein). The `sparsity` of the reservoir matrix in this case is obtained by choosing a degree of connections and dividing that by the reservoir size. Of course, it is also possible to simply choose any value between 0.0 and 1.0 to test behaviors for different sparsity values. In this example, the call to the parameters inside `RandSparseReservoir()` was done explicitly to showcase the meaning of each of them, but it is also possible to simply pass the values directly, like so `RandSparseReservoir(1.2, 6/300)`. +The `res_radius` determines the scaling of the spectral radius of the reservoir matrix; a proper scaling is necessary to assure the Echo State Property. The default value in the `rand_sparse` method is 1.0 in accordance with the most commonly followed guidelines found in the literature (see [^2] and references therein). The `sparsity` of the reservoir matrix in this case is obtained by choosing a degree of connections and dividing that by the reservoir size. Of course, it is also possible to simply choose any value between 0.0 and 1.0 to test behaviors for different sparsity values. -The value of `input_scaling` determines the upper and lower bounds of the uniform distribution of the weights in the `WeightedLayer()`. Like before, this value can be passed either as an argument or as a keyword argument `WeightedLayer(0.1)`. The value of 0.1 represents the default. The default input layer is the `DenseLayer`, a fully connected layer. The details of the weighted version can be found in [^3], for this example, this version returns the best results. +The value of `input_scaling` determines the upper and lower bounds of the uniform distribution of the weights in the `weighted_init`. The value of 0.1 represents the default. The default input layer is the `scaled_rand`, a dense matrix. The details of the weighted version can be found in [^3], for this example, this version returns the best results. The reservoir driver represents the dynamics of the reservoir. In the standard ESN definition, these dynamics are obtained through a Recurrent Neural Network (RNN), and this is reflected by calling the `RNN` driver for the `ESN` struct. This option is set as the default, and unless there is the need to change parameters, it is not needed. The full equation is the following: diff --git a/src/ReservoirComputing.jl b/src/ReservoirComputing.jl index f8668b54..aa38de0c 100644 --- a/src/ReservoirComputing.jl +++ b/src/ReservoirComputing.jl @@ -23,7 +23,8 @@ export scaled_rand, weighted_init export rand_sparse, delay_line export RNN, MRNN, GRU, GRUParams, FullyGated, Minimal export ESN, train -export DeepESN, HybridESN +export HybridESN, KnowledgeModel +export DeepESN export RECA, train export RandomMapping, RandomMaps export Generative, Predictive, OutputLayer diff --git a/src/esn/esn.jl b/src/esn/esn.jl index 3592ed8d..2beb552f 100644 --- a/src/esn/esn.jl +++ b/src/esn/esn.jl @@ -138,6 +138,6 @@ end # x_pad = pad_state!(states_type, x_pad, x_tmp) #end -function pad_esnstate!(variation, states_type, x_pad, x, args...) - x_pad = pad_state!(states_type, x_pad, x) -end +#function pad_esnstate!(variation, states_type, x_pad, x, args...) +# x_pad = pad_state!(states_type, x_pad, x) +#end diff --git a/src/esn/esn_input_layers.jl b/src/esn/esn_input_layers.jl index 32682b46..e79df1d5 100644 --- a/src/esn/esn_input_layers.jl +++ b/src/esn/esn_input_layers.jl @@ -66,39 +66,3 @@ function weighted_init(rng::AbstractRNG, ::Type{T}, dims::Integer...; scaling=T( return layer_matrix end - - -""" - sparse_layer(rng::AbstractRNG, ::Type{T}, dims::Integer...; scaling=T(0.1), sparsity=T(0.1)) where {T <: Number} - -Create and return a sparse layer matrix for use in neural network models. -The matrix will be of size specified by `dims`, with the specified `sparsity` and `scaling`. - -# Arguments -- `rng`: An instance of `AbstractRNG` for random number generation. -- `T`: The data type for the elements of the matrix. -- `dims`: Dimensions of the resulting sparse layer matrix. -- `scaling`: The scaling factor for the sparse layer matrix. Defaults to 0.1. -- `sparsity`: The sparsity level of the sparse layer matrix, controlling the fraction of zero elements. Defaults to 0.1. - -# Returns -A sparse layer matrix. - - -# Example -```julia -rng = Random.default_rng() -input_layer = weighted_init(rng, Float64, (3, 300); scaling=0.2, sparsity=0.1) -``` -""" -function sparse_layer(rng::AbstractRNG,::Type{T}, dims::Integer...; - scaling=T(0.1), sparsity=T(0.1)) where {T <: Number} - - in_size, res_size = dims - layer_matrix = Matrix(sprand(rng, res_size, in_size, sparsity)) - layer_matrix = 2.0 .* (layer_matrix .- 0.5) - replace!(layer_matrix, -1.0 => 0.0) - layer_matrix = scaling .* layer_matrix - - return layer_matrix -end diff --git a/src/esn/esn_predict.jl b/src/esn/esn_predict.jl index 5955c762..1b4cd462 100644 --- a/src/esn/esn_predict.jl +++ b/src/esn/esn_predict.jl @@ -69,13 +69,13 @@ function next_state_prediction!(esn::ESN, x, x_new, out, out_pad, i, tmp_array, end #TODO fixme @MatrinuzziFra -function next_state_prediction!(hesn::HybridESN, x, x_new, out, out_pad, i, tmp_array, args...) +function next_state_prediction!(hesn::HybridESN, x, x_new, out, out_pad, i, tmp_array, model_prediction_data) out_tmp = vcat(out, model_prediction_data[:, i]) - out_pad = pad_state!(esn.states_type, out_pad, out_tmp) - x = next_state!(x, esn.reservoir_driver, x[1:(esn.res_size)], out_pad, - esn.reservoir_matrix, esn.input_matrix, esn.bias_vector, tmp_array) + out_pad = pad_state!(hesn.states_type, out_pad, out_tmp) + x = next_state!(x, hesn.reservoir_driver, x[1:(hesn.res_size)], out_pad, + hesn.reservoir_matrix, hesn.input_matrix, hesn.bias_vector, tmp_array) x_tmp = vcat(x, model_prediction_data[:, i]) - x_new = esn.states_type(esn.nla_type, x_tmp, out_pad) + x_new = hesn.states_type(hesn.nla_type, x_tmp, out_pad) return x, x_new end diff --git a/src/esn/hybridesn.jl b/src/esn/hybridesn.jl index d1cfdac9..f29028fc 100644 --- a/src/esn/hybridesn.jl +++ b/src/esn/hybridesn.jl @@ -49,24 +49,66 @@ function KnowledgeModel(prior_model, u0, tspan, datasize) tsteps = push!(trange, dt + trange[end]) tspan_new = (tspan[1], dt + tspan[2]) model_data = prior_model(u0, tspan_new, tsteps) - return Hybrid(prior_model, u0, tspan, dt, datasize, model_data) + return KnowledgeModel(prior_model, u0, tspan, dt, datasize, model_data) +end + +function HybridESN( + model, + train_data, + in_size::Int, + res_size::Int; + input_layer = scaled_rand, + reservoir = rand_sparse, + bias = zeros64, + reservoir_driver = RNN(), + nla_type = NLADefault(), + states_type = StandardStates(), + washout = 0, + rng = _default_rng(), + T = Float32, + matrix_type = typeof(train_data) +) + + train_data = vcat(train_data, model.model_data[:, 1:(end - 1)]) + + if states_type isa AbstractPaddedStates + in_size = size(train_data, 1) + 1 + train_data = vcat(Adapt.adapt(matrix_type, ones(1, size(train_data, 2))), + train_data) + else + in_size = size(train_data, 1) + end + + reservoir_matrix = reservoir(rng, T, res_size, res_size) + #different from ESN, why? + input_matrix = input_layer(rng, T, res_size, in_size) + bias_vector = bias(rng, res_size) + inner_res_driver = reservoir_driver_params(reservoir_driver, res_size, in_size) + states = create_states(inner_res_driver, train_data, washout, reservoir_matrix, + input_matrix, bias_vector) + train_data = train_data[:, (washout + 1):end] + + HybridESN(res_size, train_data, model, nla_type, input_matrix, + inner_res_driver, reservoir_matrix, bias_vector, states_type, washout, + states) end function (hesn::HybridESN)(prediction::AbstractPrediction, output_layer::AbstractOutputLayer; - last_state = esn.states[:, [end]], + last_state = hesn.states[:, [end]], kwargs...) + km = hesn.model pred_len = prediction.prediction_len - model = variation.prior_model - predict_tsteps = [variation.tspan[2] + variation.dt] - [append!(predict_tsteps, predict_tsteps[end] + variation.dt) for i in 1:pred_len] - tspan_new = (variation.tspan[2] + variation.dt, predict_tsteps[end]) - u0 = variation.model_data[:, end] + model = km.prior_model + predict_tsteps = [km.tspan[2] + km.dt] + [append!(predict_tsteps, predict_tsteps[end] + km.dt) for i in 1:pred_len] + tspan_new = (km.tspan[2] + km.dt, predict_tsteps[end]) + u0 = km.model_data[:, end] model_pred_data = model(u0, tspan_new, predict_tsteps)[:, 2:end] - return obtain_esn_prediction(esn, prediction, last_state, output_layer, + return obtain_esn_prediction(hesn, prediction, last_state, output_layer, model_pred_data; kwargs...) end @@ -75,8 +117,8 @@ function train(hesn::HybridESN, target_data, training_method = StandardRidge(0.0)) - states = vcat(esn.states, esn.variation.model_data[:, 2:end]) - states_new = esn.states_type(esn.nla_type, states, esn.train_data[:, 1:end]) + states = vcat(hesn.states, hesn.model.model_data[:, 2:end]) + states_new = hesn.states_type(hesn.nla_type, states, hesn.train_data[:, 1:end]) return _train(states_new, target_data, training_method) end \ No newline at end of file