LAMPSPUC · andreramosfdc · Aug 17, 2024 · Aug 17, 2024
diff --git a/Project.toml b/Project.toml
@@ -4,6 +4,7 @@ authors = ["andreramosfc <[email protected]>"]
 version = "0.1.2"
 
 [deps]
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 GLMNet = "8d5ece8b-de18-5317-b113-243142960cc6"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

diff --git a/README.md b/README.md
@@ -21,7 +21,7 @@ model_input         = output.model_input # Model inputs that were utilized to bu
 Create_X            = output.Create_X # The function utilized to build the regression matrix.
 X                   = output.X # High Dimension Regression utilized in the estimation.
 coefs               = output.coefs # High Dimension Regression coefficients estimated in the estimation.
-ϵ                   = output.ϵ # Residuals of the model.
+ε                   = output.ε # Residuals of the model.
 fitted              = output.fitted # Fit in Sample of the model.
 components          = output.components # Dictionary containing information about each component of the model, each component has the keys: "Values" (The value of the component in each timestamp) , "Coefs" (The coefficients estimated for each element of the component) and "Indexes" (The indexes of the elements of the component in the high dimension regression "X").
 residuals_variances = output.residuals_variances # Dictionary containing the estimated variances for the innovations components (that is the information that can be utilized to initialize the state space model).
@@ -52,7 +52,7 @@ Current features include:
 
 ## Quick Examples
 
-### Fitting and forecasting
+### Fitting, forecasting and simulating
 Quick example of fit and forecast for the air passengers time-series.
 
 ```julia
@@ -65,11 +65,20 @@ log_air_passengers = log.(airp.passengers)
 steps_ahead = 30
 
 output = StateSpaceLearning.fit_model(log_air_passengers)
-prediction_log = StateSpaceLearning.forecast(output, steps_ahead)
+prediction_log = StateSpaceLearning.forecast(output, steps_ahead) # arguments are the output of the fitted model and number of steps ahead the user wants to forecast
 prediction = exp.(prediction_log)
 
 plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)
-plot!(vcat(ones(output.T).*NaN, prediction), lab = "Forcast", w=2, color = "blue")
+plot!(vcat(ones(length(log_air_passengers)).*NaN, prediction), lab = "Forecast", w=2, color = "blue")
+
+N_scenarios = 1000
+simulation = StateSpaceLearning.simulate(output, steps_ahead, N_scenarios) # arguments are the output of the fitted model, number of steps ahead the user wants to forecast and number of scenario paths
+
+plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)
+for s in 1:N_scenarios-1
+    plot!(vcat(ones(length(log_air_passengers)).*NaN, exp.(simulation[:, s])), lab = "", α = 0.1 , color = "red")
+end
+plot!(vcat(ones(length(log_air_passengers)).*NaN, exp.(simulation[:, N_scenarios])), lab = "Scenarios Paths", α = 0.1 , color = "red")
 
 ```
 ![quick_example_airp](./docs/assets/quick_example_airp.PNG)
@@ -119,7 +128,7 @@ X = rand(length(log_air_passengers), 10) # Create 10 exogenous features
 
 y = log_air_passengers + X[:, 1:3]*β # add to the log_air_passengers series a contribution from only 3 exogenous features.
 
-output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ϵ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true))
+output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ε" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true))
 
 Selected_exogenous = output.components["Exogenous_X"]["Selected"]
 
@@ -138,12 +147,13 @@ using Plots
 airp = CSV.File(StateSpaceLearning.AIR_PASSENGERS) |> DataFrame
 log_air_passengers = log.(airp.passengers)
 
+airpassengers = Float64.(airp.passengers)
 log_air_passengers[60:72] .= NaN
 
 output = StateSpaceLearning.fit_model(log_air_passengers)
 
 fitted_completed_missing_values = ones(144).*NaN; fitted_completed_missing_values[60:72] = exp.(output.fitted[60:72])
-real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airpassengers[60:72])
+real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airp.passengers[60:72])
 airpassengers[60:72] .= NaN
 
 plot(airpassengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)

diff --git a/docs/src/manual.md b/docs/src/manual.md
@@ -21,7 +21,7 @@ model_input         = output.model_input # Model inputs that were utilized to bu
 Create_X            = output.Create_X # The function utilized to build the regression matrix.
 X                   = output.X # High Dimension Regression utilized in the estimation.
 coefs               = output.coefs # High Dimension Regression coefficients estimated in the estimation.
-ϵ                   = output.ϵ # Residuals of the model.
+ε                   = output.ε # Residuals of the model.
 fitted              = output.fitted # Fit in Sample of the model.
 components          = output.components # Dictionary containing information about each component of the model, each component has the keys: "Values" (The value of the component in each timestamp) , "Coefs" (The coefficients estimated for each element of the component) and "Indexes" (The indexes of the elements of the component in the high dimension regression "X").
 residuals_variances = output.residuals_variances # Dictionary containing the estimated variances for the innovations components (that is the information that can be utilized to initialize the state space model).
@@ -52,7 +52,7 @@ Current features include:
 
 ## Quick Examples
 
-### Fitting and forecasting
+### Fitting, forecasting and simulating
 Quick example of fit and forecast for the air passengers time-series.
 
 ```julia
@@ -65,11 +65,20 @@ log_air_passengers = log.(airp.passengers)
 steps_ahead = 30
 
 output = StateSpaceLearning.fit_model(log_air_passengers)
-prediction_log = StateSpaceLearning.forecast(output, steps_ahead)
+prediction_log = StateSpaceLearning.forecast(output, steps_ahead) # arguments are the output of the fitted model and number of steps ahead the user wants to forecast
 prediction = exp.(prediction_log)
 
 plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)
-plot!(vcat(ones(output.T).*NaN, prediction), lab = "Forcast", w=2, color = "blue")
+plot!(vcat(ones(length(log_air_passengers)).*NaN, prediction), lab = "Forecast", w=2, color = "blue")
+
+N_scenarios = 1000
+simulation = StateSpaceLearning.simulate(output, steps_ahead, N_scenarios) # arguments are the output of the fitted model, number of steps ahead the user wants to forecast and number of scenario paths
+
+plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)
+for s in 1:N_scenarios-1
+    plot!(vcat(ones(length(log_air_passengers)).*NaN, exp.(simulation[:, s])), lab = "", α = 0.1 , color = "red")
+end
+plot!(vcat(ones(length(log_air_passengers)).*NaN, exp.(simulation[:, N_scenarios])), lab = "Scenarios Paths", α = 0.1 , color = "red")
 
 ```
 ![quick_example_airp](./docs/assets/quick_example_airp.PNG)
@@ -119,7 +128,7 @@ X = rand(length(log_air_passengers), 10) # Create 10 exogenous features
 
 y = log_air_passengers + X[:, 1:3]*β # add to the log_air_passengers series a contribution from only 3 exogenous features.
 
-output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ϵ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true))
+output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ε" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true))
 
 Selected_exogenous = output.components["Exogenous_X"]["Selected"]
 
@@ -138,12 +147,13 @@ using Plots
 airp = CSV.File(StateSpaceLearning.AIR_PASSENGERS) |> DataFrame
 log_air_passengers = log.(airp.passengers)
 
+airpassengers = Float64.(airp.passengers)
 log_air_passengers[60:72] .= NaN
 
 output = StateSpaceLearning.fit_model(log_air_passengers)
 
 fitted_completed_missing_values = ones(144).*NaN; fitted_completed_missing_values[60:72] = exp.(output.fitted[60:72])
-real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airpassengers[60:72])
+real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airp.passengers[60:72])
 airpassengers[60:72] .= NaN
 
 plot(airpassengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)

diff --git a/src/StateSpaceLearning.jl b/src/StateSpaceLearning.jl
@@ -1,6 +1,6 @@
 module StateSpaceLearning
 
-using LinearAlgebra, Statistics, GLMNet
+using LinearAlgebra, Statistics, GLMNet, Distributions
 
 include("structs.jl")
 include("models/default_model.jl")
@@ -106,4 +106,69 @@ function forecast(output::Output, steps_ahead::Int64; Exogenous_Forecast::Matrix
     return complete_matrix[end-steps_ahead+1:end, :]*output.coefs
 end
 
+"""
+simulate(output::Output, steps_ahead::Int64; N_scenarios::Int64 = 1000, simulate_outliers::Bool = true, Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, 0))::Matrix{Float64} where Fl
+
+    Generate simulations for a given number of steps ahead using the provided StateSpaceLearning output and exogenous forecast data.
+
+    # Arguments
+    - `output::Output`: Output object obtained from model fitting.
+    - `steps_ahead::Int64`: Number of steps ahead for simulation.
+    - `N_scenarios::Int64`: Number of scenarios to simulate (default: 1000).
+    - `simulate_outliers::Bool`: If true, simulate outliers (default: true).
+    - `Exogenous_Forecast::Matrix{Fl}`: Exogenous variables forecast (default: zeros(steps_ahead, 0))
+
+    # Returns
+    - `Matrix{Float64}`: Matrix containing simulated values.
+"""
+function simulate(output::Output, steps_ahead::Int64, N_scenarios::Int64; simulate_outliers::Bool = true, 
+                  innovation_functions::Dict = Dict("stochastic_level" => Dict("create_X" => create_ξ, "component" => "ξ", "args" => (length(output.ε) + steps_ahead + 1, 0)),
+                                                "stochastic_trend" => Dict("create_X" => create_ζ, "component" => "ζ", "args" => (length(output.ε) + steps_ahead + 1, 0, 1)),
+                                                "stochastic_seasonal" => Dict("create_X" => create_ω, "component" => "ω", "args" => (length(output.ε) + steps_ahead + 1, output.model_input["freq_seasonal"], 0, 1))),
+                  Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, 0))::Matrix{Float64} where Fl
+
+    prediction = forecast(output, steps_ahead; Exogenous_Forecast = Exogenous_Forecast)
+
+    T = length(output.ε)
+    simulation_X = zeros(steps_ahead, 0)
+    components_matrix = zeros(length(output.valid_indexes), 0)
+    N_components = 1
+
+    for innovation in keys(innovation_functions)
+        if output.model_input[innovation]
+            innov_dict = innovation_functions[innovation]
+            simulation_X = hcat(simulation_X, innov_dict["create_X"](innov_dict["args"]...)[end-steps_ahead:end-1, end-steps_ahead+1:end])
+            comp = fill_innovation_coefs(T, innov_dict["component"], output)
+            components_matrix = hcat(components_matrix, comp[output.valid_indexes])
+            N_components += 1
+        end
+    end
+
+    components_matrix = hcat(components_matrix, output.ε[output.valid_indexes])
+    simulation_X = hcat(simulation_X, Matrix(1.0 * I, steps_ahead, steps_ahead))
+    components_matrix += rand(Normal(0, 1), size(components_matrix)) ./ 1e9 # Make sure matrix is positive definite
+
+    ∑ = cov(components_matrix)
+    MV_dist = MvNormal(zeros(N_components), ∑)
+    o_noises = simulate_outliers && output.model_input["outlier"] ? rand(Normal(0, std(output.components["o"]["Coefs"])), steps_ahead, N_scenarios) : zeros(steps_ahead, N_scenarios)
+
+    simulation = hcat([prediction for _ in 1:N_scenarios]...)
+    for s in 1:N_scenarios
+        sim_coefs = ones(size(simulation_X, 2)) .* NaN
+
+        for i in 1:steps_ahead
+            rand_inovs = rand(MV_dist)
+
+            for comp in eachindex(rand_inovs)
+                sim_coefs[i + (comp - 1) * steps_ahead] = rand_inovs[comp]
+            end
+        end
+
+        simulation[:, s] += (simulation_X * sim_coefs + o_noises[:, s])
+    end
+
+    return simulation
+
+end
+
 end # module StateSpaceLearning
diff --git a/src/estimation_procedure/default_estimation_procedure.jl b/src/estimation_procedure/default_estimation_procedure.jl
@@ -76,16 +76,16 @@ function get_path_information_criteria(model::GLMNetPath, Lasso_X::Matrix{Tl}, L
     method_vec = Vector{Float64}(undef, path_size)
     for i in 1:path_size
         fit = Lasso_X*model.betas[:, i] .+ model.a0[i]
-        ϵ   = Lasso_y - fit
+        ε   = Lasso_y - fit
 
-        method_vec[i] = get_information(T, K[i], ϵ; information_criteria = information_criteria)
+        method_vec[i] = get_information(T, K[i], ε; information_criteria = information_criteria)
     end
 
     best_model_idx = argmin(method_vec)
     coefs = intercept ? vcat(model.a0[best_model_idx], model.betas[:, best_model_idx]) : model.betas[:, best_model_idx]
     fit   = intercept ? hcat(ones(T), Lasso_X)*coefs : Lasso_X*coefs
-    ϵ   = Lasso_y - fit
-    return coefs, ϵ
+    ε   = Lasso_y - fit
+    return coefs, ε
 end
 
 """
@@ -157,19 +157,19 @@ function fit_lasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float
     end
 
     if hasintercept
-        coefs, ϵ =  fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = !rm_average)
+        coefs, ε =  fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = !rm_average)
     else
-        coefs, ϵ =  fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = false)
+        coefs, ε =  fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = false)
     end
-    return rm_average ? (vcat(mean_y, coefs), ϵ) : (coefs, ϵ)
+    return rm_average ? (vcat(mean_y, coefs), ε) : (coefs, ε)
 
 end
 
 """
     fit_adalasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64,
                  information_criteria::String,
                  components_indexes::Dict{String, Vector{Int64}},
-                 ϵ::Float64, penalize_exogenous::Bool)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
+                 ε::Float64, penalize_exogenous::Bool)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
 
     Fits an Adaptive Lasso (AdaLasso) regression model to the provided data and returns coefficients and residuals.
 

diff --git a/src/information_criteria.jl b/src/information_criteria.jl
@@ -1,26 +1,26 @@
 """
-    get_information(T::Int64, K::Int64, ϵ::Vector{Float64};
+    get_information(T::Int64, K::Int64, ε::Vector{Float64};
                     information_criteria::String = "bic", p::Int64 = 0)::Float64
 
     Calculates information criterion value based on the provided parameters and residuals.
 
     # Arguments
     - `T::Int64`: Number of observations.
     - `K::Int64`: Number of selected predictors.
-    - `ϵ::Vector{Float64}`: Vector of residuals.
+    - `ε::Vector{Float64}`: Vector of residuals.
     - `information_criteria::String`: Method for hyperparameter selection (default: "aic").
     - `p::Int64`: Number of total predictors (default: 0).
 
     # Returns
     - `Float64`: Information criterion value.
 
 """
-function get_information(T::Int64, K::Int64, ϵ::Vector{Float64}; information_criteria::String = "aic")::Float64
+function get_information(T::Int64, K::Int64, ε::Vector{Float64}; information_criteria::String = "aic")::Float64
     if information_criteria == "bic"
-        return T*log(var(ϵ)) + K*log(T)
+        return T*log(var(ε)) + K*log(T)
     elseif information_criteria == "aic"
-        return 2*K + T*log(var(ϵ))
+        return 2*K + T*log(var(ε))
     elseif information_criteria == "aicc"
-        return 2*K + T*log(var(ϵ))  + ((2*K^2 +2*K)/(T - K - 1))
+        return 2*K + T*log(var(ε))  + ((2*K^2 +2*K)/(T - K - 1))
     end
 end
diff --git a/src/models/default_model.jl b/src/models/default_model.jl
@@ -259,7 +259,7 @@ function get_components_indexes(Exogenous_X::Matrix{Fl}, model_input::Dict)::Dic
 end
 
 """
-    get_variances(ϵ::Vector{Fl}, coefs::Vector{Fl}, components_indexes::Dict{String, Vector{Int64}})::Dict where Fl
+    get_variances(ε::Vector{Fl}, coefs::Vector{Fl}, components_indexes::Dict{String, Vector{Int64}})::Dict where Fl
 
     Calculates variances for each innovation component and for the residuals.
 

diff --git a/src/structs.jl b/src/structs.jl
@@ -8,7 +8,7 @@
     - `Create_X::Function`: Function used to create the StateSpaceLearning Matrix.
     - `X::Matrix`: StateSpaceLearning Matrix data used in the model.
     - `coefs::Vector`: Coefficients obtained from the model.
-    - `ϵ::Vector`: Residuals of the model.
+    - `ε::Vector`: Residuals of the model.
     - `fitted::Vector`: Fitted values from the model.
     - `components::Dict`: Dictionary containing different components.
     - `residuals_variances::Dict`: Dictionary storing variances of residuals for different components.
@@ -24,7 +24,7 @@ mutable struct Output
     Create_X::Function
     X::Matrix
     coefs::Vector
-    ϵ::Vector
+    ε::Vector
     fitted::Vector
     components::Dict
     residuals_variances::Dict
-Original file line number
+Diff line change
@@ Expand Up @@
     end
     """
-        get_variances(ϵ::Vector{Fl}, coefs::Vector{Fl}, components_indexes::Dict{String, Vector{Int64}})::Dict where Fl
+        get_variances(ε::Vector{Fl}, coefs::Vector{Fl}, components_indexes::Dict{String, Vector{Int64}})::Dict where Fl
         Calculates variances for each innovation component and for the residuals.
@@ Expand Down @@