From 03e5b559a28ec117d6ce4babae4279c09b5ced87 Mon Sep 17 00:00:00 2001 From: andre_ramos Date: Sat, 26 Oct 2024 16:14:56 -0300 Subject: [PATCH] separate fit_model and add code format --- .gitignore | 8 +- Project.toml | 2 +- README.md | 86 ++-- docs/make.jl | 31 +- docs/src/adapting_package.md | 93 ---- docs/src/manual.md | 106 ++--- paper_tests/m4_test/evaluate_model.jl | 47 ++- paper_tests/m4_test/m4_test.jl | 38 +- paper_tests/m4_test/metrics.jl | 12 +- paper_tests/m4_test/prepare_data.jl | 12 +- .../simulation_test/evaluate_models.jl | 103 +++-- paper_tests/simulation_test/metrics.jl | 4 +- paper_tests/simulation_test/simulation.jl | 188 +++++---- .../simulation_test/simulation_generator.jl | 67 +-- src/StateSpaceLearning.jl | 169 +------- src/datasets.jl | 2 +- ...n_procedure.jl => estimation_procedure.jl} | 136 +++--- src/fit_forecast.jl | 144 +++++++ src/information_criteria.jl | 11 +- src/models/default_model.jl | 283 ------------- src/models/structural_model.jl | 397 ++++++++++++++++++ src/structs.jl | 19 +- src/utils.jl | 46 +- test/StateSpaceLearning.jl | 65 --- test/estimation_procedure.jl | 197 +++++++++ .../default_estimation_procedure.jl | 127 ------ test/fit_forecast.jl | 81 ++++ test/information_criteria.jl | 16 +- test/models/default_model.jl | 220 ---------- test/models/structural_model.jl | 363 ++++++++++++++++ test/runtests.jl | 6 +- test/utils.jl | 84 +++- 32 files changed, 1794 insertions(+), 1369 deletions(-) delete mode 100644 docs/src/adapting_package.md rename src/{estimation_procedure/default_estimation_procedure.jl => estimation_procedure.jl} (58%) create mode 100644 src/fit_forecast.jl delete mode 100644 src/models/default_model.jl create mode 100644 src/models/structural_model.jl delete mode 100644 test/StateSpaceLearning.jl create mode 100644 test/estimation_procedure.jl delete mode 100644 test/estimation_procedure/default_estimation_procedure.jl create mode 100644 test/fit_forecast.jl delete mode 100644 test/models/default_model.jl create mode 100644 test/models/structural_model.jl diff --git a/.gitignore b/.gitignore index bcee700..f4343d6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,8 @@ .DS_Store -Manifest.toml \ No newline at end of file +Manifest.toml + +local_test/ +paper_tests/m4_test/metrics_results/ +paper_tests/m4_test/results_ARIMA/ +paper_tests/m4_test/results_SSL/ +paper_tests/simulation_test/results_simulation_raw/ diff --git a/Project.toml b/Project.toml index aba0044..4693f10 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StateSpaceLearning" uuid = "971c4b7c-2c4e-4bac-8525-e842df3cde7b" authors = ["andreramosfc "] -version = "0.3.0" +version = "1.0.0" [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" diff --git a/README.md b/README.md index 3d6fd03..26cb4b9 100644 --- a/README.md +++ b/README.md @@ -13,33 +13,31 @@ using StateSpaceLearning y = randn(100) -#Fit Model -output = StateSpaceLearning.fit_model(y) - -#Main output options -model_input = output.model_input # Model inputs that were utilized to build the regression matrix. -Create_X = output.Create_X # The function utilized to build the regression matrix. -X = output.X # High Dimension Regression utilized in the estimation. -coefs = output.coefs # High Dimension Regression coefficients estimated in the estimation. -ε = output.ε # Residuals of the model. -fitted = output.fitted # Fit in Sample of the model. -components = output.components # Dictionary containing information about each component of the model, each component has the keys: "Values" (The value of the component in each timestamp) , "Coefs" (The coefficients estimated for each element of the component) and "Indexes" (The indexes of the elements of the component in the high dimension regression "X"). -residuals_variances = output.residuals_variances # Dictionary containing the estimated variances for the innovations components (that is the information that can be utilized to initialize the state space model). -valid_indexes = output.valid_indexes # Vector containing valid indexes of the time series (non valid indexes represent NaN values in the time series). - -#Forecast +# Instantiate Model +model = StructuralModel(y) + +# Fit Model +fit!(model) + +# Point Forecast prediction = StateSpaceLearning.forecast(output, 12) #Gets a 12 steps ahead prediction +# Scenarios Path Simulation +simulation = StateSpaceLearning.simulate(output, 12, 1000) #Gets 1000 scenarios path of 12 steps ahead predictions ``` -## Fit Arguments +## StructuralModel Arguments -* `y::Vector{Fl}`: Vector of data. -* `model_input::Dict`: Dictionary containing the model input parameters (default: Dict("level" => true, "stochastic\_level" => true, "trend" => true, "stochastic\_trend" => true, "seasonal" => true, "stochastic\_seasonal" => true, "freq\_seasonal" => 12, "outlier" => true, , "ζ\_ω\_threshold" => 12)). -* `model_functions::Dict`: Dictionary containing the model functions (default: Dict("create\_X" => create\_X, "get\_components\_indexes" => get\_components\_indexes, "get\_variances" => get\_variances)). -* `estimation_input::Dict`: Dictionary containing the estimation input parameters (default: Dict("α" => 0.1, "information\_criteria" => "aic", ψ => 0.05, "penalize\_exogenous" => true, "penalize\_initial\_states" => true)). -* `estimation_function::Function`: Estimation function (default: default\_estimation\_procedure). -* `Exogenous_X::Union{Matrix{Fl}, Missing}`: Exogenous variables matrix (default: missing). +* `y::Vector`: Vector of data. +* `level::Bool`: Boolean where to consider intercept in the model (default: true) +* `stochastic_level::Bool`: Boolean where to consider stochastic level component in the model (default: true) +* `trend::Bool`: Boolean where to consider trend component in the model (default: true) +* `stochastic_trend::Bool`: Boolean where to consider stochastic trend component in the model (default: true) +* `seasonal::Bool`: Boolean where to consider seasonal component in the model (default: true) +* `stochastic_seasonal::Bool`: Boolean where to consider stochastic seasonal component in the model (default: true) +* `freq_seasonal::Int`: Seasonal frequency to be considered in the model (default: 12) +* `outlier::Bool`: Boolean where to consider outlier component in the model (default: true) +* `ζ_ω_threshold::Int`: Argument to stabilize `stochastic trend` and `stochastic seasonal` components (default: 12) ## Features @@ -66,8 +64,9 @@ airp = CSV.File(StateSpaceLearning.AIR_PASSENGERS) |> DataFrame log_air_passengers = log.(airp.passengers) steps_ahead = 30 -output = StateSpaceLearning.fit_model(log_air_passengers) -prediction_log = StateSpaceLearning.forecast(output, steps_ahead) # arguments are the output of the fitted model and number of steps ahead the user wants to forecast +model = StructuralModel(log_air_passengers) +fit!(model) +prediction_log = StateSpaceLearning.forecast(model, steps_ahead) # arguments are the output of the fitted model and number of steps ahead the user wants to forecast prediction = exp.(prediction_log) plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom) @@ -77,7 +76,7 @@ plot!(vcat(ones(length(log_air_passengers)).*NaN, prediction), lab = "Forecast", ```julia N_scenarios = 1000 -simulation = StateSpaceLearning.simulate(output, steps_ahead, N_scenarios) # arguments are the output of the fitted model, number of steps ahead the user wants to forecast and number of scenario paths +simulation = StateSpaceLearning.simulate(model, steps_ahead, N_scenarios) # arguments are the output of the fitted model, number of steps ahead the user wants to forecast and number of scenario paths plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom) for s in 1:N_scenarios-1 @@ -99,11 +98,12 @@ using Plots airp = CSV.File(StateSpaceLearning.AIR_PASSENGERS) |> DataFrame log_air_passengers = log.(airp.passengers) -output = StateSpaceLearning.fit_model(log_air_passengers) +model = StructuralModel(log_air_passengers) +fit!(model) -level = output.components["μ1"]["Values"] + output.components["ξ"]["Values"] -slope = output.components["ν1"]["Values"] + output.components["ζ"]["Values"] -seasonal = output.components["γ1"]["Values"] + output.components["ω"]["Values"] +level = model.output.components["μ1"]["Values"] + model.output.components["ξ"]["Values"] +slope = model.output.components["ν1"]["Values"] + model.output.components["ζ"]["Values"] +seasonal = model.output.components["γ1"]["Values"] + model.output.components["ω"]["Values"] trend = level + slope plot(trend, w=2 , color = "Black", lab = "Trend Component", legend = :outerbottom) @@ -133,9 +133,10 @@ X = rand(length(log_air_passengers), 10) # Create 10 exogenous features y = log_air_passengers + X[:, 1:3]*β # add to the log_air_passengers series a contribution from only 3 exogenous features. -output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ε" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true)) +model = StructuralModel(y; Exogenous_X = X) +fit!(model; α = 1.0, information_criteria = "bic", ϵ = 0.05, penalize_exogenous = true, penalize_initial_states = true) -Selected_exogenous = output.components["Exogenous_X"]["Selected"] +Selected_exogenous = model.output.components["Exogenous_X"]["Selected"] ``` @@ -155,9 +156,10 @@ log_air_passengers = log.(airp.passengers) airpassengers = Float64.(airp.passengers) log_air_passengers[60:72] .= NaN -output = StateSpaceLearning.fit_model(log_air_passengers) +model = StructuralModel(log_air_passengers) +fit!(model) -fitted_completed_missing_values = ones(144).*NaN; fitted_completed_missing_values[60:72] = exp.(output.fitted[60:72]) +fitted_completed_missing_values = ones(144).*NaN; fitted_completed_missing_values[60:72] = exp.(model.output.fitted[60:72]) real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airp.passengers[60:72]) airpassengers[60:72] .= NaN @@ -183,8 +185,10 @@ log_air_passengers[60] = 10 log_air_passengers[30] = 1 log_air_passengers[100] = 2 -output = StateSpaceLearning.fit_model(log_air_passengers) -detected_outliers = findall(i -> i != 0, output.components["o"]["Coefs"]) +model = StructuralModel(log_air_passengers) +fit!(model) + +detected_outliers = findall(i -> i != 0, model.output.components["o"]["Coefs"]) plot(log_air_passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom) scatter!([detected_outliers], log_air_passengers[detected_outliers], lab = "Detected Outliers") @@ -203,15 +207,17 @@ using StateSpaceModels airp = CSV.File(StateSpaceLearning.AIR_PASSENGERS) |> DataFrame log_air_passengers = log.(airp.passengers) -output = StateSpaceLearning.fit_model(log_air_passengers) -residuals_variances = output.residuals_variances +model = StructuralModel(log_air_passengers) +fit!(model) -model = BasicStructural(log_air_passengers, 12) -set_initial_hyperparameters!(model, Dict("sigma2_ε" => residuals_variances["ε"], +residuals_variances = model.output.residuals_variances + +ss_model = BasicStructural(log_air_passengers, 12) +set_initial_hyperparameters!(ss_model, Dict("sigma2_ε" => residuals_variances["ε"], "sigma2_ξ" =>residuals_variances["ξ"], "sigma2_ζ" =>residuals_variances["ζ"], "sigma2_ω" =>residuals_variances["ω"])) -fit!(model) +StateSpaceModels.fit!(ss_model) ``` ## Paper Results Reproducibility diff --git a/docs/make.jl b/docs/make.jl index 0f98eea..ddf4543 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -3,25 +3,18 @@ using Documenter include("../src/StateSpaceLearning.jl") # Set up to run docstrings with jldoctest -DocMeta.setdocmeta!( - StateSpaceLearning, :DocTestSetup, :(using StateSpaceLearning); recursive=true -) +DocMeta.setdocmeta!(StateSpaceLearning, :DocTestSetup, :(using StateSpaceLearning); + recursive=true) makedocs(; - modules=[StateSpaceLearning], - doctest=true, - clean=true, - checkdocs=:none, - format=Documenter.HTML(mathengine=Documenter.MathJax2()), - sitename="StateSpaceLearning.jl", - authors="André Ramos", - pages=[ - "Home" => "index.md", "manual.md", - "adapting_package.md" - ], -) + modules=[StateSpaceLearning], + doctest=true, + clean=true, + checkdocs=:none, + format=Documenter.HTML(; mathengine=Documenter.MathJax2()), + sitename="StateSpaceLearning.jl", + authors="André Ramos", + pages=["Home" => "index.md", "manual.md"],) -deploydocs( - repo="github.com/LAMPSPUC/StateSpaceLearning.jl.git", - push_preview = true - ) +deploydocs(; repo="github.com/LAMPSPUC/StateSpaceLearning.jl.git", + push_preview=true) diff --git a/docs/src/adapting_package.md b/docs/src/adapting_package.md deleted file mode 100644 index 1c63594..0000000 --- a/docs/src/adapting_package.md +++ /dev/null @@ -1,93 +0,0 @@ -# Add New Models - -The StateSpaceLearning framework supports any additive state space formulation. This section illustrates how to utilize the framework for a specific model. - -## Local Level Model - -Although the Local Level Model is already implemented within the scope of unobserved components, we use it here as an example. To incorporate a new model, it is necessary to create a dictionary containing the model inputs and another dictionary containing three functions (create\_X, get\_components\_indexes, and get\_variances). - -### Model Inputs -For the Local Level Model, no parameters are needed. Thus, the model input can be created as: - -```julia -model_input = Dict() -``` - -### create\_X -The create\_X function constructs the matrices in the State Space Learning format. It must accept the following inputs: (model\_input::Dict, Exogenous\_X::Matrix{Fl}, steps\_ahead::Int64=0, Exogenous\_Forecast::Matrix{Fl}). It must return a matrix. - -```julia -function create_X_LocalLevel(model_input::Dict, Exogenous_X::Matrix{Fl}, - steps_ahead::Int64=0, Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, size(Exogenous_X, 2))) where Fl - T = size(Exogenous_X, 1) - initial_states_matrix = ones(T+steps_ahead, 1) - ξ_matrix = Matrix{Float64}(undef, T+steps_ahead, T - 1) - for t in 1:T+steps_ahead - ξ_matrix[t, :] = t < T ? vcat(ones(t-1), zeros(T-t)) : ones(T-1) - end - - return hcat(initial_states_matrix, ξ_matrix) -end -``` - -### get\_components\_indexes -The get\_components\_indexes function outputs a dictionary indicating the indexes of each model component, including a set of indexes for all initial states. For the Local Level Model, the only components are the initial state μ1 and its innovations ξ. The function must accept the following inputs: (Exogenous\_X::Matrix{Fl}, model\_input::Dict). It must return a dictionary. - -```julia -function get_components_indexes_LocalLevel(Exogenous_X::Matrix{Fl}, model_input::Dict)::Dict where Fl - T = size(Exogenous_X, 1) - μ1_indexes = [1] - initial_states_indexes = [1] - ξ_indexes = collect(2:T) - return Dict("μ1" => μ1_indexes, "ξ" => ξ_indexes, "initial_states" => initial_states_indexes) -end -``` -### get\_variances -The get\_variances function calculates the variances of the innovations and residuals. It must accept the following inputs:(ε::Vector{Fl}, coefs::Vector{Fl}, components\_indexes::Dict{String, Vector{Int64}}). It must return a dictionary. - -```julia -function get_variances_LocalLevel(ε::Vector{Fl}, coefs::Vector{Fl}, components_indexes::Dict{String, Vector{Int64}})::Dict where Fl - - variances = Dict() - variances["ξ"] = var(coefs[components_indexes["ξ"]]) - variances["ε"] = var(ε) - return variances -end -``` - -### Running the new model -To test the new model, run the fit\_model function with the new inputs: - -```julia -using StateSpaceLearning -using Statistics - -y = randn(100) - -#Fit Model -output = StateSpaceLearning.fit_model(y; model_input = model_input, model_functions = Dict("create_X" => create_X_LocalLevel, - "get_components_indexes" => get_components_indexes_LocalLevel, "get_variances" => get_variances_LocalLevel)) -``` - -# Changing Estimation Procedure -The current estimation procedure is based on an Adaptive Lasso. However, alternative methods can be chosen within the StateSpaceLearning framework. Below is an example of how to implement a simple model that minimizes the sum of squares of the residuals. This requires creating two variables: a dictionary estimation\_input (which is empty in this case) and a function estimation\_function with the following arguments:(Estimation_X::Matrix{Tl}, estimation\_y::Vector{Fl}, components\_indexes::Dict{String, Vector{Int64}}, estimation\_input::Dict). The function should return a tuple containing the model coefficients and residuals. - -```julia -estimation_input = Dict() -function estimation_function_min_sq(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, components_indexes::Dict{String, Vector{Int64}}, estimation_input::Dict) where {Tl, Fl} - mq_coefs = Estimation_X \ estimation_y - mq_res = estimation_y - (Estimation_X * mq_coefs) - return mq_coefs, mq_res -end -``` -### Running the model with the new estimation procedure -```julia -using StateSpaceLearning - -y = randn(100) - -#Fit Model -output = StateSpaceLearning.fit_model(y; estimation_input = estimation_input, estimation_function = estimation_function_min_sq) -``` - -By following these steps, you can customize and extend the StateSpaceLearning framework to suit a variety of state space models and estimation procedures. \ No newline at end of file diff --git a/docs/src/manual.md b/docs/src/manual.md index 71beab0..fdc3f52 100644 --- a/docs/src/manual.md +++ b/docs/src/manual.md @@ -9,33 +9,31 @@ using StateSpaceLearning y = randn(100) -#Fit Model -output = StateSpaceLearning.fit_model(y) - -#Main output options -model_input = output.model_input # Model inputs that were utilized to build the regression matrix. -Create_X = output.Create_X # The function utilized to build the regression matrix. -X = output.X # High Dimension Regression utilized in the estimation. -coefs = output.coefs # High Dimension Regression coefficients estimated in the estimation. -ε = output.ε # Residuals of the model. -fitted = output.fitted # Fit in Sample of the model. -components = output.components # Dictionary containing information about each component of the model, each component has the keys: "Values" (The value of the component in each timestamp) , "Coefs" (The coefficients estimated for each element of the component) and "Indexes" (The indexes of the elements of the component in the high dimension regression "X"). -residuals_variances = output.residuals_variances # Dictionary containing the estimated variances for the innovations components (that is the information that can be utilized to initialize the state space model). -valid_indexes = output.valid_indexes # Vector containing valid indexes of the time series (non valid indexes represent NaN values in the time series). - -#Forecast +# Instantiate Model +model = StructuralModel(y) + +# Fit Model +fit!(model) + +# Point Forecast prediction = StateSpaceLearning.forecast(output, 12) #Gets a 12 steps ahead prediction +# Scenarios Path Simulation +simulation = StateSpaceLearning.simulate(output, 12, 1000) #Gets 1000 scenarios path of 12 steps ahead predictions ``` -## Fit Arguments +## StructuralModel Arguments -* `y::Vector{Fl}`: Vector of data. -* `model_input::Dict`: Dictionary containing the model input parameters (default: Dict("level" => true, "stochastic\_level" => true, "trend" => true, "stochastic\_trend" => true, "seasonal" => true, "stochastic\_seasonal" => true, "freq\_seasonal" => 12, "outlier" => true, , "ζ\_ω\_threshold" => 12)). -* `model_functions::Dict`: Dictionary containing the model functions (default: Dict("create\_X" => create\_X, "get\_components\_indexes" => get\_components\_indexes, "get\_variances" => get\_variances)). -* `estimation_input::Dict`: Dictionary containing the estimation input parameters (default: Dict("α" => 0.1, "information\_criteria" => "aic", ψ => 0.05, "penalize\_exogenous" => true, "penalize\_initial\_states" => true)). -* `estimation_function::Function`: Estimation function (default: default\_estimation\_procedure). -* `Exogenous_X::Union{Matrix{Fl}, Missing}`: Exogenous variables matrix (default: missing). +* `y::Vector`: Vector of data. +* `level::Bool`: Boolean where to consider intercept in the model (default: true) +* `stochastic_level::Bool`: Boolean where to consider stochastic level component in the model (default: true) +* `trend::Bool`: Boolean where to consider trend component in the model (default: true) +* `stochastic_trend::Bool`: Boolean where to consider stochastic trend component in the model (default: true) +* `seasonal::Bool`: Boolean where to consider seasonal component in the model (default: true) +* `stochastic_seasonal::Bool`: Boolean where to consider stochastic seasonal component in the model (default: true) +* `freq_seasonal::Int`: Seasonal frequency to be considered in the model (default: 12) +* `outlier::Bool`: Boolean where to consider outlier component in the model (default: true) +* `ζ_ω_threshold::Int`: Argument to stabilize `stochastic trend` and `stochastic seasonal` components (default: 12) ## Features @@ -62,18 +60,19 @@ airp = CSV.File(StateSpaceLearning.AIR_PASSENGERS) |> DataFrame log_air_passengers = log.(airp.passengers) steps_ahead = 30 -output = StateSpaceLearning.fit_model(log_air_passengers) -prediction_log = StateSpaceLearning.forecast(output, steps_ahead) # arguments are the output of the fitted model and number of steps ahead the user wants to forecast +model = StructuralModel(log_air_passengers) +fit!(model) +prediction_log = StateSpaceLearning.forecast(model, steps_ahead) # arguments are the output of the fitted model and number of steps ahead the user wants to forecast prediction = exp.(prediction_log) plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom) plot!(vcat(ones(length(log_air_passengers)).*NaN, prediction), lab = "Forecast", w=2, color = "blue") ``` -![quick_example_airp](assets/quick_example_airp.PNG) +![quick_example_airp](./docs/src/assets/quick_example_airp.PNG) ```julia N_scenarios = 1000 -simulation = StateSpaceLearning.simulate(output, steps_ahead, N_scenarios) # arguments are the output of the fitted model, number of steps ahead the user wants to forecast and number of scenario paths +simulation = StateSpaceLearning.simulate(model, steps_ahead, N_scenarios) # arguments are the output of the fitted model, number of steps ahead the user wants to forecast and number of scenario paths plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom) for s in 1:N_scenarios-1 @@ -82,7 +81,7 @@ end plot!(vcat(ones(length(log_air_passengers)).*NaN, exp.(simulation[:, N_scenarios])), lab = "Scenarios Paths", α = 0.1 , color = "red") ``` -![airp_sim](assets/airp_sim.svg) +![airp_sim](./docs/src/assets/airp_sim.svg) ### Component Extraction Quick example on how to perform component extraction in time series utilizing StateSpaceLearning. @@ -95,19 +94,22 @@ using Plots airp = CSV.File(StateSpaceLearning.AIR_PASSENGERS) |> DataFrame log_air_passengers = log.(airp.passengers) -output = StateSpaceLearning.fit_model(log_air_passengers) +model = StructuralModel(log_air_passengers) +fit!(model) -level = output.components["μ1"]["Values"] + output.components["ξ"]["Values"] -slope = output.components["ν1"]["Values"] + output.components["ζ"]["Values"] -seasonal = output.components["γ1"]["Values"] + output.components["ω"]["Values"] +level = model.output.components["μ1"]["Values"] + model.output.components["ξ"]["Values"] +slope = model.output.components["ν1"]["Values"] + model.output.components["ζ"]["Values"] +seasonal = model.output.components["γ1"]["Values"] + model.output.components["ω"]["Values"] trend = level + slope plot(trend, w=2 , color = "Black", lab = "Trend Component", legend = :outerbottom) plot(seasonal, w=2 , color = "Black", lab = "Seasonal Component", legend = :outerbottom) ``` -![quick_example_trend](assets/trend.svg) -![quick_example_seas](assets/seasonal.svg) + +| ![quick_example_trend](./docs/src/assets/trend.svg) | ![quick_example_seas](./docs/src/assets/seasonal.svg)| +|:------------------------------:|:-----------------------------:| + ### Best Subset Selection Quick example on how to perform best subset selection in time series utilizing StateSpaceLearning. @@ -127,9 +129,10 @@ X = rand(length(log_air_passengers), 10) # Create 10 exogenous features y = log_air_passengers + X[:, 1:3]*β # add to the log_air_passengers series a contribution from only 3 exogenous features. -output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ε" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true)) +model = StructuralModel(y; Exogenous_X = X) +fit!(model; α = 1.0, information_criteria = "bic", ϵ = 0.05, penalize_exogenous = true, penalize_initial_states = true) -Selected_exogenous = output.components["Exogenous_X"]["Selected"] +Selected_exogenous = model.output.components["Exogenous_X"]["Selected"] ``` @@ -149,9 +152,10 @@ log_air_passengers = log.(airp.passengers) airpassengers = Float64.(airp.passengers) log_air_passengers[60:72] .= NaN -output = StateSpaceLearning.fit_model(log_air_passengers) +model = StructuralModel(log_air_passengers) +fit!(model) -fitted_completed_missing_values = ones(144).*NaN; fitted_completed_missing_values[60:72] = exp.(output.fitted[60:72]) +fitted_completed_missing_values = ones(144).*NaN; fitted_completed_missing_values[60:72] = exp.(model.output.fitted[60:72]) real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airp.passengers[60:72]) airpassengers[60:72] .= NaN @@ -160,7 +164,7 @@ plot!(real_removed_valued, lab = "Real Removed Values", w=2, color = "red") plot!(fitted_completed_missing_values, lab = "Fit in Sample completed values", w=2, color = "blue") ``` -![quick_example_completion_airp](assets/quick_example_completion_airp.PNG) +![quick_example_completion_airp](./docs/src/assets/quick_example_completion_airp.PNG) ### Outlier Detection Quick example of outlier detection for an altered air passengers time-series (artificial NaN values are added to the original time-series). @@ -177,14 +181,16 @@ log_air_passengers[60] = 10 log_air_passengers[30] = 1 log_air_passengers[100] = 2 -output = StateSpaceLearning.fit_model(log_air_passengers) -detected_outliers = findall(i -> i != 0, output.components["o"]["Coefs"]) +model = StructuralModel(log_air_passengers) +fit!(model) + +detected_outliers = findall(i -> i != 0, model.output.components["o"]["Coefs"]) plot(log_air_passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom) scatter!([detected_outliers], log_air_passengers[detected_outliers], lab = "Detected Outliers") ``` -![quick_example_completion_airp](assets/outlier.svg) +![quick_example_completion_airp](./docs/src/assets/outlier.svg) ### StateSpaceModels initialization Quick example on how to use StateSpaceLearning to initialize StateSpaceModels @@ -197,15 +203,17 @@ using StateSpaceModels airp = CSV.File(StateSpaceLearning.AIR_PASSENGERS) |> DataFrame log_air_passengers = log.(airp.passengers) -output = StateSpaceLearning.fit_model(log_air_passengers) -residuals_variances = output.residuals_variances +model = StructuralModel(log_air_passengers) +fit!(model) + +residuals_variances = model.output.residuals_variances -model = BasicStructural(log_air_passengers, 12) -set_initial_hyperparameters!(model, Dict("sigma2_ε" => residuals_variances["ε"], +ss_model = BasicStructural(log_air_passengers, 12) +set_initial_hyperparameters!(ss_model, Dict("sigma2_ε" => residuals_variances["ε"], "sigma2_ξ" =>residuals_variances["ξ"], "sigma2_ζ" =>residuals_variances["ζ"], "sigma2_ω" =>residuals_variances["ω"])) -fit!(model) +StateSpaceModels.fit!(ss_model) ``` ## Paper Results Reproducibility @@ -221,9 +229,9 @@ julia paper_tests/m4_test/m4_test.jl python paper_tests/m4_test/m4_test.py ``` -The results for SSL model in terms of MASE and sMAPE for all 48000 series will be stored in folder "paper\_tests/m4\_test/results\_SSL". The average results of MASE, sMAPE and OWA will be saved in file "paper\_tests/m4\_test/metric\_results/SSL\_METRICS\_RESULTS.csv". +The results for SSL model in terms of MASE and sMAPE for all 48000 series will be stored in folder "paper_tests/m4_test/results_SSL". The average results of MASE, sMAPE and OWA will be saved in file "paper_tests/m4_test/metric_results/SSL_METRICS_RESULTS.csv". -The results for SS model in terms of MASE and sMAPE for all 48000 series will be stored in folder "paper\_tests/m4\_test/results\_SS". The average results of MASE, sMAPE and OWA will be saved in file "paper\_tests/m4\_test/metric\_results/SS\_METRICS\_RESULTS.csv". +The results for SS model in terms of MASE and sMAPE for all 48000 series will be stored in folder "paper_tests/m4_test/results_SS". The average results of MASE, sMAPE and OWA will be saved in file "paper_tests/m4_test/metric_results/SS_METRICS_RESULTS.csv". ### Simulation Experiment @@ -239,7 +247,7 @@ As this test takes a long time, you may want to run it in parallel, for that you julia paper_tests/simulation_test/simulation.jl 3 ``` -The results will be saved in two separated files: "paper\_tests/simulation\_test/results\_metrics/metrics\_confusion\_matrix.csv" and "paper\_tests/simulation\_test/results\_metrics/metrics\_bias\_mse.csv" +The results will be saved in two separated files: "paper_tests/simulation_test/results_metrics/metrics_confusion_matrix.csv" and "paper_tests/simulation_test/results_metrics/metrics_bias_mse.csv" ## Contributing diff --git a/paper_tests/m4_test/evaluate_model.jl b/paper_tests/m4_test/evaluate_model.jl index ba0295a..d443bc1 100644 --- a/paper_tests/m4_test/evaluate_model.jl +++ b/paper_tests/m4_test/evaluate_model.jl @@ -1,28 +1,35 @@ -function evaluate_SSL(initialization_df::DataFrame, results_df::DataFrame, input::Dict, outlier::Bool, α::Float64, H::Int64, sample_size::Int64, information_criteria::String) - +function evaluate_SSL(initialization_df::DataFrame, results_df::DataFrame, input::Dict, + outlier::Bool, α::Float64, H::Int64, sample_size::Int64, + information_criteria::String) normalized_y = input["normalized_train"] - y_train = input["train"] - y_test = input["test"] - max_y = input["max"] - min_y = input["min"] + y_train = input["train"] + y_test = input["test"] + max_y = input["max"] + min_y = input["min"] - T= length(normalized_y) - normalized_y = normalized_y[max(1, T-sample_size+1):end] - output = StateSpaceLearning.fit_model(normalized_y; - model_input = Dict("level" => true, "stochastic_level" => true, "trend" => true, - "stochastic_trend" => true, - "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 12, - "outlier" => outlier, "ζ_ω_threshold" => 12), - estimation_input=Dict("α" => α, "information_criteria" => information_criteria, "ϵ" => 0.05, - "penalize_exogenous" => true, "penalize_initial_states" => true)) - normalized_prediction = StateSpaceLearning.forecast(output, H) + T = length(normalized_y) + normalized_y = normalized_y[max(1, T - sample_size + 1):end] + + model = StateSpaceLearning.StructuralModel(normalized_y; level=true, + stochastic_level=true, trend=true, + stochastic_trend=true, seasonal=true, + stochastic_seasonal=true, freq_seasonal=12, + outlier=outlier, ζ_ω_threshold=12) + StateSpaceLearning.fit!(model; α=α, information_criteria=information_criteria, ϵ=0.05, + penalize_exogenous=true, penalize_initial_states=true) + + normalized_prediction = StateSpaceLearning.forecast(model, H) prediction = de_normalize(normalized_prediction, max_y, min_y) - mase = MASE(y_train, y_test, prediction) + mase = MASE(y_train, y_test, prediction) smape = sMAPE(y_test, prediction) results_df = vcat(results_df, DataFrame([[mase], [smape]], [:MASE, :sMAPE])) - initialization_df = vcat(initialization_df, DataFrame([[output.residuals_variances["ξ"]], [output.residuals_variances["ω"]], [output.residuals_variances["ε"]], [output.residuals_variances["ζ"]]], [:ξ, :ω, :ϵ, :ζ])) + initialization_df = vcat(initialization_df, + DataFrame([[model.output.residuals_variances["ξ"]], + [model.output.residuals_variances["ω"]], + [model.output.residuals_variances["ε"]], + [model.output.residuals_variances["ζ"]]], + [:ξ, :ω, :ϵ, :ζ])) return initialization_df, results_df - -end \ No newline at end of file +end diff --git a/paper_tests/m4_test/m4_test.jl b/paper_tests/m4_test/m4_test.jl index cbcb7ad..53efca9 100644 --- a/paper_tests/m4_test/m4_test.jl +++ b/paper_tests/m4_test/m4_test.jl @@ -1,4 +1,4 @@ -import Pkg +using Pkg: Pkg Pkg.activate(".") Pkg.instantiate() @@ -10,7 +10,7 @@ df_train2 = CSV.read("paper_tests/m4_test/Monthly-train2.csv", DataFrame) df_train3 = CSV.read("paper_tests/m4_test/Monthly-train3.csv", DataFrame) df_train4 = CSV.read("paper_tests/m4_test/Monthly-train4.csv", DataFrame) df_train = vcat(df_train1, df_train2, df_train3, df_train4) # so that files are not too big and can be uploaded to github -df_test = CSV.read("paper_tests/m4_test/Monthly-test.csv", DataFrame) +df_test = CSV.read("paper_tests/m4_test/Monthly-test.csv", DataFrame) include("metrics.jl") include("evaluate_model.jl") @@ -26,11 +26,11 @@ function append_results(filepath, results_df) df_old = CSV.read(filepath, DataFrame) results_df = vcat(df_old, results_df) end - CSV.write(filepath, results_df) + return CSV.write(filepath, results_df) end -function run_config(results_table::DataFrame, outlier::Bool, information_criteria::String, α::Float64, save_init::Bool, sample_size::Int64) - +function run_config(results_table::DataFrame, outlier::Bool, information_criteria::String, + α::Float64, save_init::Bool, sample_size::Int64) NAIVE_sMAPE = 14.427 #M4 Paper NAIVE_MASE = 1.063 #M4 Paper @@ -47,7 +47,9 @@ function run_config(results_table::DataFrame, outlier::Bool, information_criteri initialization_df = DataFrame() end - initialization_df, results_df = evaluate_SSL(initialization_df, results_df, dict_vec[i], outlier, α, H, sample_size, information_criteria) + initialization_df, results_df = evaluate_SSL(initialization_df, results_df, + dict_vec[i], outlier, α, H, + sample_size, information_criteria) if i in [10000, 20000, 30000, 40000, 48000] !save_init ? append_results(filepath, results_df) : nothing @@ -57,11 +59,15 @@ function run_config(results_table::DataFrame, outlier::Bool, information_criteri results_df = CSV.read(filepath, DataFrame) - mase = trunc(mean(results_df[:, :MASE]), digits = 3) - smape = trunc(mean(results_df[:, :sMAPE]), digits = 3) - owa = trunc(mean([mean(results_df[:, :sMAPE])/NAIVE_sMAPE, mean(results_df[:, :MASE])/NAIVE_MASE]), digits = 3) - name = outlier ? "SSL-O ($(information_criteria), α = $(α))" : "SSL ($(information_criteria), α = $(α))" - results_table = vcat(results_table, DataFrame("Names" => ["$name"], "MASE" => [mase], "sMAPE" => [smape], "OWA" => [owa])) + mase = trunc(mean(results_df[:, :MASE]); digits=3) + smape = trunc(mean(results_df[:, :sMAPE]); digits=3) + owa = trunc(mean([mean(results_df[:, :sMAPE]) / NAIVE_sMAPE, + mean(results_df[:, :MASE]) / NAIVE_MASE]); digits=3) + name = outlier ? "SSL-O ($(information_criteria), α = $(α))" : + "SSL ($(information_criteria), α = $(α))" + results_table = vcat(results_table, + DataFrame("Names" => ["$name"], "MASE" => [mase], + "sMAPE" => [smape], "OWA" => [owa])) return results_table end @@ -72,12 +78,14 @@ function main() for information_criteria in ["aic", "bic"] for α in vcat([0.0], collect(0.1:0.2:0.9), [1.0]) @info "Running SSL with outlier = $outlier, information_criteria = $information_criteria, α = $α" - results_table = run_config(results_table, outlier, information_criteria, α, false, 60) + results_table = run_config(results_table, outlier, information_criteria, α, + false, 60) end end end - CSV.write("paper_tests/m4_test/metrics_results/SSL_METRICS_RESULTS.csv", results_table) -end + return CSV.write("paper_tests/m4_test/metrics_results/SSL_METRICS_RESULTS.csv", + results_table) +end function create_dirs() try @@ -106,4 +114,4 @@ create_dirs() main() -run_config(DataFrame(), false, "aic", 0.1, true, 2794)#max sample size \ No newline at end of file +run_config(DataFrame(), false, "aic", 0.1, true, 2794)#max sample size diff --git a/paper_tests/m4_test/metrics.jl b/paper_tests/m4_test/metrics.jl index fb5515c..ed5f8ae 100644 --- a/paper_tests/m4_test/metrics.jl +++ b/paper_tests/m4_test/metrics.jl @@ -2,18 +2,18 @@ function sMAPE(y_test::Vector, prediction::Vector) H = length(y_test) denominator = abs.(y_test) + abs.(prediction) - return (200/H)*sum(abs(y_test[i] - prediction[i])/(denominator[i]) for i in 1:H) + return (200 / H) * sum(abs(y_test[i] - prediction[i]) / (denominator[i]) for i in 1:H) end function MASE(y_train::Vector, y_test::Vector, prediction::Vector; m::Int64=12) T = length(y_train) H = length(y_test) - numerator = (1/H)*sum(abs(y_test[i] - prediction[i]) for i in 1:H) - denominator = (1/(T-m))*sum(abs(y_train[j]-y_train[j-m]) for j in m+1:T) - return numerator/denominator + numerator = (1 / H) * sum(abs(y_test[i] - prediction[i]) for i in 1:H) + denominator = (1 / (T - m)) * sum(abs(y_train[j] - y_train[j - m]) for j in (m + 1):T) + return numerator / denominator end function OWA(MASE1, MASE2, sMAPE1, sMAPE2) - return 0.5*(((MASE1)/(MASE2))+((sMAPE1)/(sMAPE2))) -end \ No newline at end of file + return 0.5 * (((MASE1) / (MASE2)) + ((sMAPE1) / (sMAPE2))) +end diff --git a/paper_tests/m4_test/prepare_data.jl b/paper_tests/m4_test/prepare_data.jl index 8d9bdbc..2dd3303 100644 --- a/paper_tests/m4_test/prepare_data.jl +++ b/paper_tests/m4_test/prepare_data.jl @@ -11,10 +11,10 @@ function build_train_test_dict(df_train::DataFrame, df_test::DataFrame) for i in eachindex(df_train[:, 1]) key = df_train[:, 1][i] y_raw = Vector(df_train[i, :])[2:end] - y_train_raw = y_raw[1:findlast(i->!ismissing(i), y_raw)] + y_train_raw = y_raw[1:findlast(i -> !ismissing(i), y_raw)] T = length(y_train_raw) y_train = y_train_raw - y_test = Vector(df_test[i, :])[2:end] + y_test = Vector(df_test[i, :])[2:end] y_max = maximum(y_train) y_min = minimum(y_train) @@ -24,9 +24,9 @@ function build_train_test_dict(df_train::DataFrame, df_test::DataFrame) train_test_dict[key] = Dict() train_test_dict[key]["normalized_train"] = y_train_normalized train_test_dict[key]["train"] = y_train - train_test_dict[key]["test"] = y_test - train_test_dict[key]["max"] = y_max - train_test_dict[key]["min"] = y_min + train_test_dict[key]["test"] = y_test + train_test_dict[key]["max"] = y_max + train_test_dict[key]["min"] = y_min end dict_vec = [] @@ -36,4 +36,4 @@ function build_train_test_dict(df_train::DataFrame, df_test::DataFrame) push!(dict_vec, train_test_dict[key]) end return dict_vec -end \ No newline at end of file +end diff --git a/paper_tests/simulation_test/evaluate_models.jl b/paper_tests/simulation_test/evaluate_models.jl index 42e4f74..df1d063 100644 --- a/paper_tests/simulation_test/evaluate_models.jl +++ b/paper_tests/simulation_test/evaluate_models.jl @@ -7,31 +7,39 @@ function get_confusion_matrix(selected, true_features, false_features) return true_positives, false_positives, false_negatives, true_negatives end -function get_SSL_results(y_train::Vector{Float64}, true_features::Vector{Int64}, false_features::Vector{Int64}, X_train::Matrix{Float64}, inf_criteria::String, true_β::Vector{Float64}) - series_result=nothing - - t = @elapsed output = StateSpaceLearning.fit_model(y_train; Exogenous_X=X_train, - model_input = Dict("level" => true, "stochastic_level" => true, "trend" => true, - "stochastic_trend" => true, - "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 12, - "outlier" => false, "ζ_ω_threshold" => 12), - estimation_input=Dict("α" => 1.0, "information_criteria" => inf_criteria, "ϵ" => 0.05, - "penalize_exogenous" => true, "penalize_initial_states" => true)) +function get_SSL_results(y_train::Vector{Float64}, true_features::Vector{Int64}, + false_features::Vector{Int64}, X_train::Matrix{Float64}, + inf_criteria::String, true_β::Vector{Float64}) + series_result = nothing - selected = output.components["Exogenous_X"]["Selected"] - true_positives, false_positives, false_negatives, true_negatives = get_confusion_matrix(selected, true_features, false_features) + model = StateSpaceLearning.StructuralModel(y_train; level=true, stochastic_level=true, + trend=true, stochastic_trend=true, + seasonal=true, stochastic_seasonal=true, + freq_seasonal=12, outlier=false, + ζ_ω_threshold=12, Exogenous_X=X_train) + t = @elapsed StateSpaceLearning.fit!(model; α=1.0, information_criteria=inf_criteria, + ϵ=0.05, penalize_exogenous=true, + penalize_initial_states=true) - mse = mse_func(output.components["Exogenous_X"]["Coefs"], true_β) - bias = bias_func(output.components["Exogenous_X"]["Coefs"], true_β) + selected = model.output.components["Exogenous_X"]["Selected"] + true_positives, false_positives, false_negatives, true_negatives = get_confusion_matrix(selected, + true_features, + false_features) - series_result = DataFrame([[t], [mse], [bias], [true_positives], [false_positives], [false_negatives], [true_negatives]], [:time, :mse, :bias, :true_positives, :false_positives, :false_negatives, :true_negatives]) + mse = mse_func(model.output.components["Exogenous_X"]["Coefs"], true_β) + bias = bias_func(model.output.components["Exogenous_X"]["Coefs"], true_β) + + series_result = DataFrame([[t], [mse], [bias], [true_positives], [false_positives], + [false_negatives], [true_negatives]], + [:time, :mse, :bias, :true_positives, :false_positives, + :false_negatives, :true_negatives]) return series_result end - -function get_SS_res_results(y_train::Vector{Float64}, true_features::Vector{Int64}, false_features::Vector{Int64}, X_train::Matrix{Float64}, inf_criteria::String, true_β::Vector{Float64}) - +function get_SS_res_results(y_train::Vector{Float64}, true_features::Vector{Int64}, + false_features::Vector{Int64}, X_train::Matrix{Float64}, + inf_criteria::String, true_β::Vector{Float64}) py""" import math import statsmodels.api as sm @@ -49,20 +57,31 @@ function get_SS_res_results(y_train::Vector{Float64}, true_features::Vector{Int6 t = @elapsed begin res, converged = py"evaluate_ss"(y_train) - lasso_path = glmnet(X_train, res, alpha = 1.0, intercept=false) - coefs1, _ = StateSpaceLearning.get_path_information_criteria(lasso_path, X_train, res, inf_criteria;intercept=false) - penalty_factor = 1 ./ (abs.(coefs1)); - lasso_path2 = glmnet(X_train, res, alpha = 1.0, penalty_factor=penalty_factor, intercept=false) - lasso_coefs, _ = StateSpaceLearning.get_path_information_criteria(lasso_path2, X_train, res, inf_criteria;intercept=false) + lasso_path = glmnet(X_train, res; alpha=1.0, intercept=false) + coefs1, _ = StateSpaceLearning.get_path_information_criteria(lasso_path, X_train, + res, inf_criteria; + intercept=false) + penalty_factor = 1 ./ (abs.(coefs1)) + lasso_path2 = glmnet(X_train, res; alpha=1.0, penalty_factor=penalty_factor, + intercept=false) + lasso_coefs, _ = StateSpaceLearning.get_path_information_criteria(lasso_path2, + X_train, res, + inf_criteria; + intercept=false) end selected = findall(i -> i != 0, lasso_coefs) - true_positives, false_positives, false_negatives, true_negatives = get_confusion_matrix(selected, true_features, false_features) - + true_positives, false_positives, false_negatives, true_negatives = get_confusion_matrix(selected, + true_features, + false_features) + mse = mse_func(lasso_coefs, true_β) bias = bias_func(lasso_coefs, true_β) - - series_result = DataFrame([[t], [mse], [bias], [true_positives], [false_positives], [false_negatives], [true_negatives]], [:time, :mse, :bias, :true_positives, :false_positives, :false_negatives, :true_negatives]) + + series_result = DataFrame([[t], [mse], [bias], [true_positives], [false_positives], + [false_negatives], [true_negatives]], + [:time, :mse, :bias, :true_positives, :false_positives, + :false_negatives, :true_negatives]) return series_result, converged end @@ -85,8 +104,9 @@ function get_exogenous_ss_inf_criteria(y_train::Vector{Float64}, X_train::Matrix return py"evaluate_ss"(y_train, X_train) end -function get_forward_ss(y_train::Vector{Float64}, true_features::Vector{Int64}, false_features::Vector{Int64}, X_train::Matrix{Float64}, inf_criteria::String, true_β::Vector{Float64}) - +function get_forward_ss(y_train::Vector{Float64}, true_features::Vector{Int64}, + false_features::Vector{Int64}, X_train::Matrix{Float64}, + inf_criteria::String, true_β::Vector{Float64}) best_inf_crit = Inf current_inf_crit = 0 coefs = nothing @@ -95,15 +115,19 @@ function get_forward_ss(y_train::Vector{Float64}, true_features::Vector{Int64}, stop = false last_converged = nothing converged = nothing - t = @elapsed begin + t = @elapsed begin while !stop iteration_inf_vec = [] iteration_coefs_vec = [] for i in remaining_exogs - aic, bic, coefs_exog, converged = get_exogenous_ss_inf_criteria(y_train, X_train[:, sort(vcat(selected, i))]) + aic, bic, coefs_exog, converged = get_exogenous_ss_inf_criteria(y_train, + X_train[:, + sort(vcat(selected, + i))]) inf_crit = inf_criteria == "aic" ? aic : bic push!(iteration_inf_vec, inf_crit) - push!(iteration_coefs_vec, coefs_exog[end - length(vcat(selected, i))+1:end]) + push!(iteration_coefs_vec, + coefs_exog[(end - length(vcat(selected, i)) + 1):end]) end current_inf_crit = minimum(iteration_inf_vec) if current_inf_crit < best_inf_crit @@ -121,12 +145,17 @@ function get_forward_ss(y_train::Vector{Float64}, true_features::Vector{Int64}, estimated_coefs = zeros(size(X_train, 2)) estimated_coefs[sort(selected)] = coefs - true_positives, false_positives, false_negatives, true_negatives = get_confusion_matrix(selected, true_features, false_features) - + true_positives, false_positives, false_negatives, true_negatives = get_confusion_matrix(selected, + true_features, + false_features) + mse = mse_func(estimated_coefs, true_β) bias = bias_func(estimated_coefs, true_β) - - series_result = DataFrame([[t], [mse], [bias], [true_positives], [false_positives], [false_negatives], [true_negatives]], [:time, :mse, :bias, :true_positives, :false_positives, :false_negatives, :true_negatives]) + + series_result = DataFrame([[t], [mse], [bias], [true_positives], [false_positives], + [false_negatives], [true_negatives]], + [:time, :mse, :bias, :true_positives, :false_positives, + :false_negatives, :true_negatives]) return series_result, last_converged -end \ No newline at end of file +end diff --git a/paper_tests/simulation_test/metrics.jl b/paper_tests/simulation_test/metrics.jl index c03061e..6f943b2 100644 --- a/paper_tests/simulation_test/metrics.jl +++ b/paper_tests/simulation_test/metrics.jl @@ -1,7 +1,7 @@ function mse_func(prediction::Vector, test::Vector) - return mean((prediction - test).^2) + return mean((prediction - test) .^ 2) end function bias_func(prediction::Vector, test::Vector) return mean((prediction - test)) -end \ No newline at end of file +end diff --git a/paper_tests/simulation_test/simulation.jl b/paper_tests/simulation_test/simulation.jl index ae4f074..4e50f20 100644 --- a/paper_tests/simulation_test/simulation.jl +++ b/paper_tests/simulation_test/simulation.jl @@ -1,4 +1,4 @@ -import Pkg +using Pkg: Pkg Pkg.activate(".") Pkg.instantiate() using StateSpaceLearning @@ -13,19 +13,21 @@ include("evaluate_models.jl") include("simulation_generator.jl") using Distributed -import Pkg +using Pkg: Pkg Pkg.activate(".") Pkg.instantiate() using StateSpaceLearning -ARGS[1] > 1 ? addprocs(ARGS[1]) : nothing +nprocs = parse(Int, ARGS[1]) +nprocs > 1 ? addprocs(nprocs) : nothing @everywhere begin - import Pkg - using CSV, DataFrames, StateSpaceModels, Statistics, PyCall, Distributions, GLMNet, Polynomials + using Pkg: Pkg + using CSV, DataFrames, StateSpaceModels, Statistics, PyCall, Distributions, GLMNet, + Polynomials end @everywhere begin - import Pkg + using Pkg: Pkg Pkg.activate(".") using StateSpaceLearning end @@ -64,41 +66,61 @@ create_dirs() rr6 = DataFrame() for i in 1:50 y_featured, true_exps, X, true_β = generate_subset(60, M, K) - y_featured = M_K_d["p_vec_M_K"][i]["y_featured"] - true_exps = M_K_d["p_vec_M_K"][i]["true_exps"] + y_featured = M_K_d["p_vec_M_K"][i]["y_featured"] + true_exps = M_K_d["p_vec_M_K"][i]["true_exps"] X = M_K_d["p_vec_M_K"][i]["X"] - true_β = M_K_d["p_vec_M_K"][i]["true_β"] + true_β = M_K_d["p_vec_M_K"][i]["true_β"] @info(i) - series_result1 = get_SSL_results(y_featured, collect(1:K), setdiff(collect(1:M), collect(1:K)), X, "aic", true_β) - series_result2 = get_SSL_results(y_featured, collect(1:K), setdiff(collect(1:M), collect(1:K)), X, "bic", true_β) - series_result3, converged3 = get_forward_ss(y_featured, collect(1:K), setdiff(collect(1:M), collect(1:K)), X, "aic", true_β) - series_result4, converged4 = get_forward_ss(y_featured, collect(1:K), setdiff(collect(1:M), collect(1:K)), X, "bic", true_β) - series_result5, converged5 = get_SS_res_results(y_featured, collect(1:K), setdiff(collect(1:M), collect(1:K)), X, "aic", true_β) - series_result6, converged6 = get_SS_res_results(y_featured, collect(1:K), setdiff(collect(1:M), collect(1:K)), X, "bic", true_β) + series_result1 = get_SSL_results(y_featured, collect(1:K), + setdiff(collect(1:M), collect(1:K)), X, "aic", + true_β) + series_result2 = get_SSL_results(y_featured, collect(1:K), + setdiff(collect(1:M), collect(1:K)), X, "bic", + true_β) + series_result3, converged3 = get_forward_ss(y_featured, collect(1:K), + setdiff(collect(1:M), collect(1:K)), + X, "aic", true_β) + series_result4, converged4 = get_forward_ss(y_featured, collect(1:K), + setdiff(collect(1:M), collect(1:K)), + X, "bic", true_β) + series_result5, converged5 = get_SS_res_results(y_featured, collect(1:K), + setdiff(collect(1:M), + collect(1:K)), X, "aic", + true_β) + series_result6, converged6 = get_SS_res_results(y_featured, collect(1:K), + setdiff(collect(1:M), + collect(1:K)), X, "bic", + true_β) rr1 = vcat(rr1, series_result1) rr2 = vcat(rr2, series_result2) rr3 = vcat(rr3, series_result3) rr4 = vcat(rr4, series_result4) rr5 = vcat(rr5, series_result5) rr6 = vcat(rr6, series_result6) - i+=1 + i += 1 @info("K: $K, M: $M") @info(mean(rr2[!, "true_positives"])) @info(mean(rr2[!, "false_positives"])) end - CSV.write("paper_tests/simulation_test/results_simulation_raw/SSL_aic_$(M)$(K)_$N.csv", rr1) - CSV.write("paper_tests/simulation_test/results_simulation_raw/SSL_bic_$(M)$(K)_$N.csv", rr2) - CSV.write("paper_tests/simulation_test/results_simulation_raw/SS_aic_f$(M)$(K)_$N.csv", rr3) - CSV.write("paper_tests/simulation_test/results_simulation_raw/SS_bic_f$(M)$(K)_$N.csv", rr4) - CSV.write("paper_tests/simulation_test/results_simulation_raw/SS_aic_$(M)$(K)_$N.csv", rr5) - CSV.write("paper_tests/simulation_test/results_simulation_raw/SS_bic_$(M)$(K)_$N.csv", rr6) + CSV.write("paper_tests/simulation_test/results_simulation_raw/SSL_aic_$(M)$(K)_$N.csv", + rr1) + CSV.write("paper_tests/simulation_test/results_simulation_raw/SSL_bic_$(M)$(K)_$N.csv", + rr2) + CSV.write("paper_tests/simulation_test/results_simulation_raw/SS_aic_f$(M)$(K)_$N.csv", + rr3) + CSV.write("paper_tests/simulation_test/results_simulation_raw/SS_bic_f$(M)$(K)_$N.csv", + rr4) + CSV.write("paper_tests/simulation_test/results_simulation_raw/SS_aic_$(M)$(K)_$N.csv", + rr5) + return CSV.write("paper_tests/simulation_test/results_simulation_raw/SS_bic_$(M)$(K)_$N.csv", + rr6) end end Random.seed!(2024) p_vec = [] for M in [50, 100] - for K in [3, 5, 8, 10] + for K in [3, 5, 8, 10] for N in 1:10 p_vec_M_K = Dict() for i in 1:50 @@ -115,13 +137,10 @@ for M in [50, 100] end function robust_pmap(f::Function, a; num_retries::Int=3) - - return pmap(f, a; on_error = e-> rethrow(), - retry_delays=ExponentialBackOff(n=num_retries) - ) + return pmap(f, a; on_error=e -> rethrow(), + retry_delays=ExponentialBackOff(; n=num_retries)) end - robust_pmap(get_M_K_res, p_vec) for M in [50, 100] @@ -133,12 +152,18 @@ for M in [50, 100] last_df5 = DataFrame() last_df6 = DataFrame() for i in 1:10 - df1 = CSV.read("paper_tests/simulation_test/results_simulation_raw/SSL_aic_$(M)$(K)_$i.csv", DataFrame) - df2 = CSV.read("paper_tests/simulation_test/results_simulation_raw/SSL_bic_$(M)$(K)_$i.csv", DataFrame) - df3 = CSV.read("paper_tests/simulation_test/results_simulation_raw/SS_aic_$(M)$(K)_$i.csv", DataFrame) - df4 = CSV.read("paper_tests/simulation_test/results_simulation_raw/SS_bic_$(M)$(K)_$i.csv", DataFrame) - df5 = CSV.read("paper_tests/simulation_test/results_simulation_raw/SS_aic_f$(M)$(K)_$i.csv", DataFrame) - df6 = CSV.read("paper_tests/simulation_test/results_simulation_raw/SS_bic_f$(M)$(K)_$i.csv", DataFrame) + df1 = CSV.read("paper_tests/simulation_test/results_simulation_raw/SSL_aic_$(M)$(K)_$i.csv", + DataFrame) + df2 = CSV.read("paper_tests/simulation_test/results_simulation_raw/SSL_bic_$(M)$(K)_$i.csv", + DataFrame) + df3 = CSV.read("paper_tests/simulation_test/results_simulation_raw/SS_aic_$(M)$(K)_$i.csv", + DataFrame) + df4 = CSV.read("paper_tests/simulation_test/results_simulation_raw/SS_bic_$(M)$(K)_$i.csv", + DataFrame) + df5 = CSV.read("paper_tests/simulation_test/results_simulation_raw/SS_aic_f$(M)$(K)_$i.csv", + DataFrame) + df6 = CSV.read("paper_tests/simulation_test/results_simulation_raw/SS_bic_f$(M)$(K)_$i.csv", + DataFrame) last_df1 = vcat(last_df1, df1) last_df2 = vcat(last_df2, df2) last_df3 = vcat(last_df3, df3) @@ -146,12 +171,18 @@ for M in [50, 100] last_df5 = vcat(last_df5, df5) last_df6 = vcat(last_df6, df6) end - CSV.write("paper_tests/simulation_test/results_simulation/SSL_aic_$(M)$(K).csv", last_df1) - CSV.write("paper_tests/simulation_test/results_simulation/SSL_bic_$(M)$(K).csv", last_df2) - CSV.write("paper_tests/simulation_test/results_simulation/SS_aic_$(M)$(K).csv", last_df3) - CSV.write("paper_tests/simulation_test/results_simulation/SS_bic_$(M)$(K).csv", last_df4) - CSV.write("paper_tests/simulation_test/results_simulation/SS_aic_f$(M)$(K).csv", last_df5) - CSV.write("paper_tests/simulation_test/results_simulation/SS_bic_f$(M)$(K).csv", last_df6) + CSV.write("paper_tests/simulation_test/results_simulation/SSL_aic_$(M)$(K).csv", + last_df1) + CSV.write("paper_tests/simulation_test/results_simulation/SSL_bic_$(M)$(K).csv", + last_df2) + CSV.write("paper_tests/simulation_test/results_simulation/SS_aic_$(M)$(K).csv", + last_df3) + CSV.write("paper_tests/simulation_test/results_simulation/SS_bic_$(M)$(K).csv", + last_df4) + CSV.write("paper_tests/simulation_test/results_simulation/SS_aic_f$(M)$(K).csv", + last_df5) + CSV.write("paper_tests/simulation_test/results_simulation/SS_bic_f$(M)$(K).csv", + last_df6) end end @@ -165,18 +196,20 @@ function get_metrics(df, q, n) false_positive_rate = 0 for i in 1:T i_df = df[i, :] - if i_df["true_positives"] == q && i_df["true_negatives"] == n - q && i_df["false_positives"] == 0 && i_df["false_negatives"] == 0 - true_model_rate += 1/T + if i_df["true_positives"] == q && i_df["true_negatives"] == n - q && + i_df["false_positives"] == 0 && i_df["false_negatives"] == 0 + true_model_rate += 1 / T end - if i_df["true_positives"] == q - all_true_positives_rate += 1/T + if i_df["true_positives"] == q + all_true_positives_rate += 1 / T end - true_positive_rate += i_df["true_positives"]/(T*q) - true_negative_rate += i_df["true_negatives"]/(T*(n-q)) - positive_rate += (i_df["true_positives"] + i_df["false_positives"])/T - false_positive_rate += (i_df["false_positives"])/T + true_positive_rate += i_df["true_positives"] / (T * q) + true_negative_rate += i_df["true_negatives"] / (T * (n - q)) + positive_rate += (i_df["true_positives"] + i_df["false_positives"]) / T + false_positive_rate += (i_df["false_positives"]) / T end - return true_model_rate, all_true_positives_rate, true_positive_rate, true_negative_rate, positive_rate, false_positive_rate + return true_model_rate, all_true_positives_rate, true_positive_rate, true_negative_rate, + positive_rate, false_positive_rate end df_dict = Dict() @@ -187,15 +220,20 @@ df_dict["SS_f_bic"] = Dict() df_dict["SSL_aic"] = Dict() df_dict["SSL_bic"] = Dict() - for n in [50, 100] for q in [3, 5, 8, 10] - df_SS_aic = CSV.read("paper_tests/simulation_test/results_simulation/SS_aic_$n$(q).csv", DataFrame) - df_SS_bic = CSV.read("paper_tests/simulation_test/results_simulation/SS_bic_$n$(q).csv", DataFrame) - df_SS_aicf = CSV.read("paper_tests/simulation_test/results_simulation/SS_aic_f$n$(q).csv", DataFrame) - df_SS_bicf = CSV.read("paper_tests/simulation_test/results_simulation/SS_bic_f$n$(q).csv", DataFrame) - df_SSL_aic = CSV.read("paper_tests/simulation_test/results_simulation/SSL_aic_$n$(q).csv", DataFrame) - df_SSL_bic = CSV.read("paper_tests/simulation_test/results_simulation/SSL_bic_$n$(q).csv", DataFrame) + df_SS_aic = CSV.read("paper_tests/simulation_test/results_simulation/SS_aic_$n$(q).csv", + DataFrame) + df_SS_bic = CSV.read("paper_tests/simulation_test/results_simulation/SS_bic_$n$(q).csv", + DataFrame) + df_SS_aicf = CSV.read("paper_tests/simulation_test/results_simulation/SS_aic_f$n$(q).csv", + DataFrame) + df_SS_bicf = CSV.read("paper_tests/simulation_test/results_simulation/SS_bic_f$n$(q).csv", + DataFrame) + df_SSL_aic = CSV.read("paper_tests/simulation_test/results_simulation/SSL_aic_$n$(q).csv", + DataFrame) + df_SSL_bic = CSV.read("paper_tests/simulation_test/results_simulation/SSL_bic_$n$(q).csv", + DataFrame) df_dict["SS_aic"][n, q] = get_metrics(df_SS_aic, q, n) df_dict["SS_bic"][n, q] = get_metrics(df_SS_bic, q, n) df_dict["SS_f_aic"][n, q] = get_metrics(df_SS_aicf, q, n) @@ -212,11 +250,11 @@ for name in ["SSL_aic", "SSL_bic", "SS_aic", "SS_bic", "SS_f_aic", "SS_f_bic"] column = [] for i in 1:6 for q in [3, 5, 8, 10] - push!(column, round(df_dict[name][n, q][i], digits = 3)) + push!(column, round(df_dict[name][n, q][i]; digits=3)) end end df[!, Symbol(name * "_" * string(n))] = column - end + end end CSV.write("paper_tests/simulation_test/results_metrics/metrics_confusion_matrix.csv", df) @@ -226,17 +264,17 @@ df_mse_bias = DataFrame() function convert_to_sci_notation(num::Float64) # Get the exponent part of the number in scientific notation exp_part = floor(log10(abs(num))) - + # Divide the number by 10^(exp_part) to get the mantissa mantissa = num / 10^(exp_part) - + # Round the mantissa to have one decimal place - rounded_mantissa = round(mantissa, digits=1) - + rounded_mantissa = round(mantissa; digits=1) + # Construct the string representation of the result in scientific notation result_str = string(rounded_mantissa, "e", exp_part) - - return result_str[1:end-2] + + return result_str[1:(end - 2)] end for name in ["SSL_aic_", "SSL_bic_", "SS_aic_", "SS_bic_", "SS_aic_f", "SS_bic_f"] @@ -244,23 +282,24 @@ for name in ["SSL_aic_", "SSL_bic_", "SS_aic_", "SS_bic_", "SS_aic_f", "SS_bic_f column = [] for i in 1:5 for q in [3, 5, 8, 10] - df_name = CSV.read("paper_tests/simulation_test/results_simulation/$(name)$n$(q).csv", DataFrame) + df_name = CSV.read("paper_tests/simulation_test/results_simulation/$(name)$n$(q).csv", + DataFrame) if i == 1 - num = round(mean(df_name[:, "mse"]), digits = 3) + num = round(mean(df_name[:, "mse"]); digits=3) if num > 10 push!(column, convert_to_sci_notation(num)) else push!(column, num) end elseif i == 2 - num = round(mean(df_name[:, "bias"]), digits = 3) + num = round(mean(df_name[:, "bias"]); digits=3) if num > 10 push!(column, convert_to_sci_notation(num)) else push!(column, num) end elseif i == 3 - num = round(mean(df_name[:, "time"]), digits = 3) + num = round(mean(df_name[:, "time"]); digits=3) if num > 10 push!(column, convert_to_sci_notation(num)) else @@ -272,8 +311,8 @@ for name in ["SSL_aic_", "SSL_bic_", "SS_aic_", "SS_bic_", "SS_aic_f", "SS_bic_f q75 = quantile(mse_vec, 0.75) IQR = q75 - q25 #exclude out of iqr values - mse_vec = mse_vec[(q25 - 1.5*IQR .<= mse_vec) .& (mse_vec .<= q75 + 1.5*IQR)] - num = round(mean(mse_vec), digits = 3) + mse_vec = mse_vec[(q25 - 1.5 * IQR .<= mse_vec) .& (mse_vec .<= q75 + 1.5 * IQR)] + num = round(mean(mse_vec); digits=3) if num > 10 push!(column, convert_to_sci_notation(num)) else @@ -285,8 +324,8 @@ for name in ["SSL_aic_", "SSL_bic_", "SS_aic_", "SS_bic_", "SS_aic_f", "SS_bic_f q75 = quantile(bias_vec, 0.75) IQR = q75 - q25 #exclude out of iqr values - bias_vec = bias_vec[(q25 - 1.5*IQR .<= bias_vec) .& (bias_vec .<= q75 + 1.5*IQR)] - num = round(mean(bias_vec), digits = 3) + bias_vec = bias_vec[(q25 - 1.5 * IQR .<= bias_vec) .& (bias_vec .<= q75 + 1.5 * IQR)] + num = round(mean(bias_vec); digits=3) if num > 10 push!(column, convert_to_sci_notation(num)) else @@ -296,6 +335,7 @@ for name in ["SSL_aic_", "SSL_bic_", "SS_aic_", "SS_bic_", "SS_aic_f", "SS_bic_f end end df_mse_bias[!, Symbol(name * "_" * string(n))] = column - end + end end -CSV.write("paper_tests/simulation_test/results_simulation/metrics_bias_mse.csv", df_mse_bias) \ No newline at end of file +CSV.write("paper_tests/simulation_test/results_simulation/metrics_bias_mse.csv", + df_mse_bias) diff --git a/paper_tests/simulation_test/simulation_generator.jl b/paper_tests/simulation_test/simulation_generator.jl index ec72ab4..b3273e8 100644 --- a/paper_tests/simulation_test/simulation_generator.jl +++ b/paper_tests/simulation_test/simulation_generator.jl @@ -1,41 +1,48 @@ -using Random, Distributions, Statistics +using Random, Distributions, Statistics using Polynomials -function ar_polinomial(p::Vector{Fl}) where Fl +function ar_polinomial(p::Vector{Fl}) where {Fl} return Polynomial([one(Fl); -p]) end -function ma_polinomial(q::Vector{Fl}) where Fl +function ma_polinomial(q::Vector{Fl}) where {Fl} return Polynomial([one(Fl); q]) end function roots_of_inverse_polinomial(poly::Polynomial) - return roots(poly).^-1 + return roots(poly) .^ -1 end -function assert_stationarity(p::Vector{Fl}) where Fl +function assert_stationarity(p::Vector{Fl}) where {Fl} poly = ar_polinomial(p) return all(abs.(roots_of_inverse_polinomial(poly)) .< 1) end -function assert_invertibility(q::Vector{Fl}) where Fl +function assert_invertibility(q::Vector{Fl}) where {Fl} poly = ma_polinomial(q) return all(abs.(roots_of_inverse_polinomial(poly)) .< 1) end function generate_sarima_exog(T::Int64, M::Int64) - X = zeros(T, M) s = 12 for j in 1:M - try_again = true - ar_params = nothing; ma_params = nothing; sar_params = nothing; sma_params = nothing - p = nothing; q = nothing; P = nothing; Q = nothing + ar_params = nothing + ma_params = nothing + sar_params = nothing + sma_params = nothing + p = nothing + q = nothing + P = nothing + Q = nothing while try_again - p=rand([0, 1,2,3]);q=rand([0, 1,2,3]);P=rand([0,1]);Q=rand([0,1]) - + p = rand([0, 1, 2, 3]) + q = rand([0, 1, 2, 3]) + P = rand([0, 1]) + Q = rand([0, 1]) + ar_params = rand(Normal(0, 0.2), p) ma_params = rand(Normal(0, 0.2), q) sar_params = rand(Normal(0, 0.2), P) @@ -48,19 +55,21 @@ function generate_sarima_exog(T::Int64, M::Int64) try_again = false end end - - data = vcat(rand(Normal(0, 1), max(p, q, P * s, Q * s)), zeros(T - max(p, q, P * s, Q * s))) - for i = max(p, q, P * s, Q * s) + 1:T + + data = vcat(rand(Normal(0, 1), max(p, q, P * s, Q * s)), + zeros(T - max(p, q, P * s, Q * s))) + for i in (max(p, q, P * s, Q * s) + 1):T seasonal_comp = 0 if i > s - P_term = P == 0 ? 0 : sum(sar_params .* data[(i - s * P):(i - s):(i - s * P + 1)]) - Q_term = Q == 0 ? 0 : sum(sma_params .* data[(i - s * Q):(i - s):(i - s * Q + 1)]) + P_term = P == 0 ? 0 : + sum(sar_params .* data[(i - s * P):(i - s):(i - s * P + 1)]) + Q_term = Q == 0 ? 0 : + sum(sma_params .* data[(i - s * Q):(i - s):(i - s * Q + 1)]) seasonal_comp = P_term + Q_term end p_term = sum(ar_params .* data[(i - p):(i - 1)]) q_term = sum(ma_params .* data[(i - q):(i - 1)]) data[i] += p_term + q_term + seasonal_comp - end # Add Gaussian noise @@ -73,8 +82,8 @@ end function generate_subset(T::Int64, M::Int64, K::Int64) s = 12 - - μ1 = 1. + + μ1 = 1.0 ν1 = 0.001 γ1 = [1.5, 2.6, 3.0, 2.6, 1.5, 0.0, -1.5, -2.6, -3.0, -2.6, -1.5] @@ -84,20 +93,20 @@ function generate_subset(T::Int64, M::Int64, K::Int64) γ = [γ1...] y = [μ1 + γ1[1]] - for t in 2:T - push!(ν, ν[t-1] + rand(Normal(0, stds[2]))) - push!(μ, μ[t-1] + ν[t-1] + rand(Normal(0, stds[1]))) + for t in 2:T + push!(ν, ν[t - 1] + rand(Normal(0, stds[2]))) + push!(μ, μ[t - 1] + ν[t - 1] + rand(Normal(0, stds[1]))) if t > 11 - push!(γ, - sum(γ[t - j] for j in 1:s-1) + stds[3]) + push!(γ, -sum(γ[t - j] for j in 1:(s - 1)) + stds[3]) end push!(y, μ[t] + γ[t]) end - X = generate_sarima_exog(T, M) + X = generate_sarima_exog(T, M) true_exps = collect(1:K) - β = ones(length(true_exps))#rand(Normal(0, 1.0), length(true_exps)) - X_exp = X[:, true_exps]*β + β = ones(length(true_exps))#rand(Normal(0, 1.0), length(true_exps)) + X_exp = X[:, true_exps] * β y = y + X_exp - y += rand(Normal(0, 0.2), T) + y += rand(Normal(0, 0.2), T) return y, true_exps, X, vcat(β, zeros(M - K)) -end \ No newline at end of file +end diff --git a/src/StateSpaceLearning.jl b/src/StateSpaceLearning.jl index 0a28ac1..2f7069b 100644 --- a/src/StateSpaceLearning.jl +++ b/src/StateSpaceLearning.jl @@ -2,173 +2,16 @@ module StateSpaceLearning using LinearAlgebra, Statistics, GLMNet, Distributions +abstract type StateSpaceLearningModel end + include("structs.jl") -include("models/default_model.jl") +include("models/structural_model.jl") include("information_criteria.jl") -include("estimation_procedure/default_estimation_procedure.jl") +include("estimation_procedure.jl") include("utils.jl") include("datasets.jl") +include("fit_forecast.jl") -const DEFAULT_COMPONENTS_PARAMETERS = ["level", "stochastic_level", "trend", "stochastic_trend", "seasonal", "stochastic_seasonal", "freq_seasonal"] - -export fit_model, forecast, simulate, create_X, get_variances - -""" -fit_model(y::Vector{Fl}; - model_input::Dict = Dict("level" => true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, - "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 12, - "outlier" => true, "ζ_ω_threshold" => 12), - model_functions::Dict = Dict("create_X" => create_X, "get_components_indexes" => get_components_indexes, - "get_variances" => get_variances), - estimation_input::Dict = Dict("α" => 0.1, "information_criteria" => "aic", "ϵ" => 0.05, - "penalize_exogenous" => true, "penalize_initial_states" => true), - estimation_function::Function = default_estimation_procedure, - Exogenous_X::Matrix{Fl} = zeros(length(y), 0))::Output where Fl - - Fits the StateSpaceLearning model using specified parameters and estimation procedures. - - # Arguments - - `y::Vector{Fl}`: Vector of data. - - model_input::Dict: Dictionary containing the model input parameters (default: Dict("level" => true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 12, "outlier" => true, , "ζ_ω_threshold" => 12)). - - model_functions::Dict: Dictionary containing the model functions (default: Dict("create_X" => create_X, "get_components_indexes" => get_components_indexes, "get_variances" => get_variances)). - - estimation_input::Dict: Dictionary containing the estimation input parameters (default: Dict("α" => 0.1, "information_criteria" => "aic", ϵ => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true)). - - `estimation_function::Function`: Estimation function (default: default_estimation_procedure). - - `Exogenous_X::Union{Matrix{Fl}, Missing}`: Exogenous variables matrix (default: missing). - - # Returns - - `Output`: Output object containing model information, coefficients, residuals, etc. - -""" -function fit_model(y::Vector{Fl}; - model_input::Dict = Dict("level" => true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, - "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 12, - "outlier" => true, "ζ_ω_threshold" => 12), - model_functions::Dict = Dict("create_X" => create_X, "get_components_indexes" => get_components_indexes, - "get_variances" => get_variances), - estimation_input::Dict = Dict("α" => 0.1, "information_criteria" => "aic", "ϵ" => 0.05, - "penalize_exogenous" => true, "penalize_initial_states" => true), - estimation_function::Function = default_estimation_procedure, - Exogenous_X::Matrix{Fl} = zeros(length(y), 0))::Output where Fl - - T = length(y) - - if model_functions["create_X"] == create_X - @assert T > model_input["freq_seasonal"] "Time series must be longer than the seasonal period" - @assert all([key in keys(model_input) for key in DEFAULT_COMPONENTS_PARAMETERS]) "The default components model must have all the necessary parameters $(DEFAULT_COMPONENTS_PARAMETERS)" - end - - @assert !has_intercept(Exogenous_X) "Exogenous matrix must not have an intercept column" - - X = model_functions["create_X"](model_input, Exogenous_X) - - if has_intercept(X) - @assert allequal(X[:, 1]) "Intercept column must be the first column" - @assert !has_intercept(X[:, 2:end]) "Matrix must not have more than one intercept column" - end - - estimation_y, Estimation_X, valid_indexes = handle_missing_values(X, y) - - components_indexes = model_functions["get_components_indexes"](Exogenous_X, model_input) - - coefs, estimation_ε = estimation_function(Estimation_X, estimation_y, components_indexes, estimation_input) - - components = build_components(Estimation_X, coefs, components_indexes) - - residuals_variances = model_functions["get_variances"](estimation_ε, coefs, components_indexes) - - ε, fitted = get_fit_and_residuals(estimation_ε, coefs, X, valid_indexes, T) - - return Output(model_input, model_functions["create_X"], X, coefs, ε, fitted, components, residuals_variances, valid_indexes) -end - -""" - forecast(output::Output, steps_ahead::Int; Exogenous_Forecast::Union{Matrix{Fl}, Missing}=missing)::Vector{Float64} where Fl - - Returns the forecast for a given number of steps ahead using the provided StateSpaceLearning output and exogenous forecast data. - - # Arguments - - `output::Output`: Output object obtained from model fitting. - - `steps_ahead::Int`: Number of steps ahead for forecasting. - - `Exogenous_Forecast::Matrix{Fl}`: Exogenous variables forecast (default: zeros(steps_ahead, 0)) - - # Returns - - `Vector{Float64}`: Vector containing forecasted values. - -""" -function forecast(output::Output, steps_ahead::Int; Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, 0))::Vector{Float64} where Fl - - @assert length(output.components["Exogenous_X"]["Indexes"]) == size(Exogenous_Forecast, 2) "If an exogenous matrix was utilized in the estimation procedure, it must be provided its prediction for the forecast procedure. If no exogenous matrix was utilized, Exogenous_Forecast must be missing" - @assert size(Exogenous_Forecast, 1) == steps_ahead "Exogenous_Forecast must have the same number of rows as steps_ahead" - - Exogenous_X = output.X[:, output.components["Exogenous_X"]["Indexes"]] - complete_matrix = output.Create_X(output.model_input, Exogenous_X, steps_ahead, Exogenous_Forecast) - - return complete_matrix[end-steps_ahead+1:end, :]*output.coefs -end - -""" -simulate(output::Output, steps_ahead::Int; N_scenarios::Int = 1000, simulate_outliers::Bool = true, Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, 0))::Matrix{Float64} where Fl - - Generate simulations for a given number of steps ahead using the provided StateSpaceLearning output and exogenous forecast data. - - # Arguments - - `output::Output`: Output object obtained from model fitting. - - `steps_ahead::Int`: Number of steps ahead for simulation. - - `N_scenarios::Int`: Number of scenarios to simulate (default: 1000). - - `simulate_outliers::Bool`: If true, simulate outliers (default: true). - - `Exogenous_Forecast::Matrix{Fl}`: Exogenous variables forecast (default: zeros(steps_ahead, 0)) - - # Returns - - `Matrix{Float64}`: Matrix containing simulated values. -""" -function simulate(output::Output, steps_ahead::Int, N_scenarios::Int; simulate_outliers::Bool = true, - innovation_functions::Dict = Dict("stochastic_level" => Dict("create_X" => create_ξ, "component" => "ξ", "args" => (length(output.ε) + steps_ahead + 1, 0)), - "stochastic_trend" => Dict("create_X" => create_ζ, "component" => "ζ", "args" => (length(output.ε) + steps_ahead + 1, 0, 1)), - "stochastic_seasonal" => Dict("create_X" => create_ω, "component" => "ω", "args" => (length(output.ε) + steps_ahead + 1, output.model_input["freq_seasonal"], 0, 1))), - Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, 0))::Matrix{Float64} where Fl - - prediction = forecast(output, steps_ahead; Exogenous_Forecast = Exogenous_Forecast) - - T = length(output.ε) - simulation_X = zeros(steps_ahead, 0) - components_matrix = zeros(length(output.valid_indexes), 0) - N_components = 1 - - for innovation in keys(innovation_functions) - if output.model_input[innovation] - innov_dict = innovation_functions[innovation] - simulation_X = hcat(simulation_X, innov_dict["create_X"](innov_dict["args"]...)[end-steps_ahead:end-1, end-steps_ahead+1:end]) - comp = fill_innovation_coefs(T, innov_dict["component"], output) - components_matrix = hcat(components_matrix, comp[output.valid_indexes]) - N_components += 1 - end - end - - components_matrix = hcat(components_matrix, output.ε[output.valid_indexes]) - simulation_X = hcat(simulation_X, Matrix(1.0 * I, steps_ahead, steps_ahead)) - components_matrix += rand(Normal(0, 1), size(components_matrix)) ./ 1e9 # Make sure matrix is positive definite - - ∑ = cov(components_matrix) - MV_dist = MvNormal(zeros(N_components), ∑) - o_noises = simulate_outliers && output.model_input["outlier"] ? rand(Normal(0, std(output.components["o"]["Coefs"])), steps_ahead, N_scenarios) : zeros(steps_ahead, N_scenarios) - - simulation = hcat([prediction for _ in 1:N_scenarios]...) - for s in 1:N_scenarios - sim_coefs = ones(size(simulation_X, 2)) .* NaN - - for i in 1:steps_ahead - rand_inovs = rand(MV_dist) - - for comp in eachindex(rand_inovs) - sim_coefs[i + (comp - 1) * steps_ahead] = rand_inovs[comp] - end - end - - simulation[:, s] += (simulation_X * sim_coefs + o_noises[:, s]) - end - - return simulation - -end +export fit!, forecast, simulate, StructuralModel end # module StateSpaceLearning diff --git a/src/datasets.jl b/src/datasets.jl index 32808f7..0e65aa5 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -9,4 +9,4 @@ See more on [Airline passengers](@ref) # References * https://www.stata-press.com/data/r12/ts.html """ -const AIR_PASSENGERS = joinpath(dirname(@__DIR__()), "datasets", "airpassengers.csv") \ No newline at end of file +const AIR_PASSENGERS = joinpath(dirname(@__DIR__()), "datasets", "airpassengers.csv") diff --git a/src/estimation_procedure/default_estimation_procedure.jl b/src/estimation_procedure.jl similarity index 58% rename from src/estimation_procedure/default_estimation_procedure.jl rename to src/estimation_procedure.jl index c098710..82684ac 100644 --- a/src/estimation_procedure/default_estimation_procedure.jl +++ b/src/estimation_procedure.jl @@ -10,8 +10,7 @@ - `Vector{Int}`: Vector containing the indexes of columns with dummy variables. """ -function get_dummy_indexes(Exogenous_X::Matrix{Fl}) where{Fl} - +function get_dummy_indexes(Exogenous_X::Matrix{Fl}) where {Fl} T, p = size(Exogenous_X) dummy_indexes = [] @@ -37,7 +36,8 @@ end - `Vector{Int}`: Vector containing the indexes of outlier columns that are duplicates of dummy variables in the exogenous matrix. """ -function get_outlier_duplicate_columns(Estimation_X::Matrix{Tl}, components_indexes::Dict{String, Vector{Int}}) where{Tl} +function get_outlier_duplicate_columns(Estimation_X::Matrix{Tl}, + components_indexes::Dict{String,Vector{Int}}) where {Tl} if !haskey(components_indexes, "o") return [] else @@ -48,7 +48,6 @@ function get_outlier_duplicate_columns(Estimation_X::Matrix{Tl}, components_inde return o_indexes[dummy_indexes] .- 1 end - end """ @@ -68,23 +67,29 @@ end - `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the best model. """ -function get_path_information_criteria(model::GLMNetPath, Lasso_X::Matrix{Tl}, Lasso_y::Vector{Fl}, information_criteria::String; intercept::Bool = true)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} +function get_path_information_criteria(model::GLMNetPath, Lasso_X::Matrix{Tl}, + Lasso_y::Vector{Fl}, information_criteria::String; + intercept::Bool=true)::Tuple{Vector{Float64}, + Vector{Float64}} where {Tl, + Fl} path_size = length(model.lambda) - T = size(Lasso_X, 1) - K = count(i->i != 0, model.betas; dims = 1)' + T = size(Lasso_X, 1) + K = count(i -> i != 0, model.betas; dims=1)' method_vec = Vector{Float64}(undef, path_size) for i in 1:path_size - fit = Lasso_X*model.betas[:, i] .+ model.a0[i] - ε = Lasso_y - fit - - method_vec[i] = get_information(T, K[i], ε; information_criteria = information_criteria) + fit = Lasso_X * model.betas[:, i] .+ model.a0[i] + ε = Lasso_y - fit + + method_vec[i] = get_information(T, K[i], ε; + information_criteria=information_criteria) end best_model_idx = argmin(method_vec) - coefs = intercept ? vcat(model.a0[best_model_idx], model.betas[:, best_model_idx]) : model.betas[:, best_model_idx] - fit = intercept ? hcat(ones(T), Lasso_X)*coefs : Lasso_X*coefs - ε = Lasso_y - fit + coefs = intercept ? vcat(model.a0[best_model_idx], model.betas[:, best_model_idx]) : + model.betas[:, best_model_idx] + fit = intercept ? hcat(ones(T), Lasso_X) * coefs : Lasso_X * coefs + ε = Lasso_y - fit return coefs, ε end @@ -108,9 +113,15 @@ end - `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the best model. """ -function fit_glmnet(Lasso_X::Matrix{Tl}, Lasso_y::Vector{Fl}, α::Float64; information_criteria::String = "aic", penalty_factor::Vector{Float64}=ones(size(Lasso_X,2) - 1), intercept::Bool = intercept)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} - model = glmnet(Lasso_X, Lasso_y, alpha = α, penalty_factor = penalty_factor, intercept = intercept, dfmax=size(Lasso_X, 2), lambda_min_ratio=0.001) - return get_path_information_criteria(model, Lasso_X, Lasso_y, information_criteria; intercept = intercept) +function fit_glmnet(Lasso_X::Matrix{Tl}, Lasso_y::Vector{Fl}, α::Float64; + information_criteria::String="aic", + penalty_factor::Vector{Float64}=ones(size(Lasso_X, 2) - 1), + intercept::Bool=intercept)::Tuple{Vector{Float64}, + Vector{Float64}} where {Tl,Fl} + model = glmnet(Lasso_X, Lasso_y; alpha=α, penalty_factor=penalty_factor, + intercept=intercept, dfmax=size(Lasso_X, 2), lambda_min_ratio=0.001) + return get_path_information_criteria(model, Lasso_X, Lasso_y, information_criteria; + intercept=intercept) end """ @@ -134,42 +145,54 @@ end - `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the fitted Lasso model. """ -function fit_lasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64, information_criteria::String, penalize_exogenous::Bool, components_indexes::Dict{String, Vector{Int}}, penalty_factor::Vector{Float64}; rm_average::Bool = false)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} - - outlier_duplicate_columns = get_outlier_duplicate_columns(Estimation_X, components_indexes) +function fit_lasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64, + information_criteria::String, penalize_exogenous::Bool, + components_indexes::Dict{String,Vector{Int}}, + penalty_factor::Vector{Float64}; + rm_average::Bool=false)::Tuple{Vector{Float64}, + Vector{Float64}} where {Tl,Fl} + outlier_duplicate_columns = get_outlier_duplicate_columns(Estimation_X, + components_indexes) penalty_factor[outlier_duplicate_columns] .= Inf hasintercept = has_intercept(Estimation_X) if hasintercept - !penalize_exogenous ? penalty_factor[components_indexes["Exogenous_X"] .- 1] .= 0 : nothing + !penalize_exogenous ? penalty_factor[components_indexes["Exogenous_X"] .- 1] .= 0 : + nothing Lasso_X = Estimation_X[:, 2:end] else - !penalize_exogenous ? penalty_factor[components_indexes["Exogenous_X"]] .= 0 : nothing + !penalize_exogenous ? penalty_factor[components_indexes["Exogenous_X"]] .= 0 : + nothing Lasso_X = Estimation_X @assert !rm_average "Intercept must be included in the model if rm_average is set to true" end if rm_average - mean_y = mean(estimation_y) + mean_y = mean(estimation_y) Lasso_y = estimation_y .- mean_y else Lasso_y = estimation_y end if hasintercept - coefs, ε = fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = !rm_average) + coefs, ε = fit_glmnet(Lasso_X, Lasso_y, α; + information_criteria=information_criteria, + penalty_factor=penalty_factor, intercept=!rm_average) else - coefs, ε = fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = false) + coefs, ε = fit_glmnet(Lasso_X, Lasso_y, α; + information_criteria=information_criteria, + penalty_factor=penalty_factor, intercept=false) end return rm_average ? (vcat(mean_y, coefs), ε) : (coefs, ε) - end """ - fit_adalasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64, - information_criteria::String, - components_indexes::Dict{String, Vector{Int}}, - ε::Float64, penalize_exogenous::Bool)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} + estimation_procedure(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, + components_indexes::Dict{String, Vector{Int}}, α::Float64, + information_criteria::String, + ϵ::Float64, + penalize_exogenous::Bool, + penalize_initial_states::Bool)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} Fits an Adaptive Lasso (AdaLasso) regression model to the provided data and returns coefficients and residuals. @@ -177,53 +200,70 @@ end - `Estimation_X::Matrix{Tl}`: Matrix of predictors for estimation. - `estimation_y::Vector{Fl}`: Vector of response values for estimation. - `components_indexes::Dict{String, Vector{Int}}`: Dictionary containing indexes for different components. - - `estimation_input::Dict`: Dictionary containing the estimation input parameters. + - `α::Float64`: Elastic net control factor between ridge (α=0) and lasso (α=1) (default: 0.1). + - `information_criteria::String`: Information Criteria method for hyperparameter selection (default: aic). + - `ϵ::Float64`: Non negative value to handle 0 coefs on the first lasso step (default: 0.05). + - `penalize_exogenous::Bool`: Flag for selecting exogenous variables. When false the penalty factor for these variables will be set to 0. + - `penalize_initial_states::Bool`: Flag for selecting initial states. When false the penalty factor for these variables will be set to 0. # Returns - `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the fitted AdaLasso model. """ -function default_estimation_procedure(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, - components_indexes::Dict{String, Vector{Int}}, estimation_input::Dict)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} - - @assert all([key in keys(estimation_input) for key in ["α", "information_criteria", "ϵ", "penalize_exogenous", "penalize_initial_states"]]) "All estimation input parameters must be set" - α = estimation_input["α"]; information_criteria = estimation_input["information_criteria"]; - ϵ = estimation_input["ϵ"]; penalize_exogenous = estimation_input["penalize_exogenous"]; - penalize_initial_states = estimation_input["penalize_initial_states"] - +function estimation_procedure(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, + components_indexes::Dict{String,Vector{Int}}, α::Float64, + information_criteria::String, + ϵ::Float64, + penalize_exogenous::Bool, + penalize_initial_states::Bool)::Tuple{Vector{Float64}, + Vector{Float64}} where {Tl, + Fl} @assert 0 <= α <= 1 "α must be in [0, 1]" + @assert ϵ > 0 "ϵ must be positive" hasintercept = has_intercept(Estimation_X) if hasintercept penalty_factor = ones(size(Estimation_X, 2) - 1) penalty_factor[components_indexes["initial_states"][2:end] .- 1] .= 0 - coefs, _ = fit_lasso(Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes, penalty_factor; rm_average = true) + coefs, _ = fit_lasso(Estimation_X, estimation_y, α, information_criteria, + penalize_exogenous, components_indexes, penalty_factor; + rm_average=true) else penalty_factor = ones(size(Estimation_X, 2)) penalty_factor[components_indexes["initial_states"][2:end]] .= 0 - coefs, _ = fit_lasso(Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes, penalty_factor; rm_average = false) + coefs, _ = fit_lasso(Estimation_X, estimation_y, α, information_criteria, + penalize_exogenous, components_indexes, penalty_factor; + rm_average=false) end #AdaLasso per component - ts_penalty_factor = hasintercept ? zeros(size(Estimation_X, 2) - 1) : zeros(size(Estimation_X, 2)) + ts_penalty_factor = hasintercept ? zeros(size(Estimation_X, 2) - 1) : + zeros(size(Estimation_X, 2)) for key in keys(components_indexes) if key != "initial_states" && key != "μ1" component = components_indexes[key] if key != "Exogenous_X" && key != "o" && !(key in ["ν1", "γ1"]) κ = count(i -> i != 0, coefs[component]) < 1 ? 0 : std(coefs[component]) - hasintercept ? ts_penalty_factor[component .- 1] .= (1 / (κ + ϵ)) : ts_penalty_factor[component] .= (1 / (κ + ϵ)) + hasintercept ? ts_penalty_factor[component .- 1] .= (1 / (κ + ϵ)) : + ts_penalty_factor[component] .= (1 / (κ + ϵ)) else - hasintercept ? ts_penalty_factor[component .- 1] = (1 ./ (abs.(coefs[component]) .+ ϵ)) : ts_penalty_factor[component] = (1 ./ (abs.(coefs[component]) .+ ϵ)) + hasintercept ? + ts_penalty_factor[component .- 1] = (1 ./ (abs.(coefs[component]) .+ ϵ)) : + ts_penalty_factor[component] = (1 ./ (abs.(coefs[component]) .+ ϵ)) end end - end + end if hasintercept - !penalize_initial_states ? ts_penalty_factor[components_indexes["initial_states"][2:end] .- 1] .= 0 : nothing + !penalize_initial_states ? + ts_penalty_factor[components_indexes["initial_states"][2:end] .- 1] .= 0 : nothing else - !penalize_initial_states ? ts_penalty_factor[components_indexes["initial_states"][2:end]] .= 0 : nothing + !penalize_initial_states ? + ts_penalty_factor[components_indexes["initial_states"][2:end]] .= 0 : nothing end - return fit_lasso(Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes, ts_penalty_factor; rm_average = false) + return fit_lasso(Estimation_X, estimation_y, α, information_criteria, + penalize_exogenous, components_indexes, ts_penalty_factor; + rm_average=false) end diff --git a/src/fit_forecast.jl b/src/fit_forecast.jl new file mode 100644 index 0000000..1134e99 --- /dev/null +++ b/src/fit_forecast.jl @@ -0,0 +1,144 @@ + +""" +function fit!(model::StateSpaceLearningModel, + α::Float64 = 0.1, + information_criteria::String = "aic", + ϵ::Float64 = 0.05, + penalize_exogenous::Bool = true, + penalize_initial_states::Bool = true, + ) + + Fits the StateSpaceLearning model using specified parameters and estimation procedures. + + # Arguments + model::StateSpaceLearningModel: Model to be fitted. + α::Float64: Elastic net mixing parameter (default: 0.1). + information_criteria::String: Method for hyperparameter selection (default: "aic"). + ϵ::Float64: Non negative value to handle 0 coefs on the first lasso step (default: 0.05). + penalize_exogenous::Bool: If true, penalize exogenous variables (default: true). + penalize_initial_states::Bool: If true, penalize initial states (default: true). +""" +function fit!(model::StateSpaceLearningModel; + α::Float64=0.1, + information_criteria::String="aic", + ϵ::Float64=0.05, + penalize_exogenous::Bool=true, + penalize_initial_states::Bool=true,) + if has_intercept(model.X) + @assert allequal(model.X[:, 1]) "Intercept column must be the first column" + @assert !has_intercept(model.X[:, 2:end]) "Matrix must not have more than one intercept column" + end + + estimation_y, Estimation_X, valid_indexes = handle_missing_values(model.X, model.y) + + components_indexes = get_components_indexes(model) + + coefs, estimation_ε = estimation_procedure(Estimation_X, estimation_y, + components_indexes, α, information_criteria, + ϵ, penalize_exogenous, + penalize_initial_states) + + components = build_components(Estimation_X, coefs, components_indexes) + + residuals_variances = get_variances(model, estimation_ε, coefs, components_indexes) + + ε, fitted = get_fit_and_residuals(estimation_ε, coefs, model.X, valid_indexes, + length(model.y)) + + output = Output(coefs, ε, fitted, residuals_variances, valid_indexes, components) + return model.output = output +end + +""" + forecast(model::StateSpaceLearningModel, steps_ahead::Int; Exogenous_Forecast::Union{Matrix{Fl}, Missing}=missing)::Vector{Float64} where Fl + + Returns the forecast for a given number of steps ahead using the provided StateSpaceLearning output and exogenous forecast data. + + # Arguments + - `model::StateSpaceLearningModel`: Model obtained from fitting. + - `steps_ahead::Int`: Number of steps ahead for forecasting. + - `Exogenous_Forecast::Matrix{Fl}`: Exogenous variables forecast (default: zeros(steps_ahead, 0)) + + # Returns + - `Vector{Float64}`: Vector containing forecasted values. + +""" +function forecast(model::StateSpaceLearningModel, steps_ahead::Int; + Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, 0))::Vector{Float64} where {Fl} + @assert length(model.output.components["Exogenous_X"]["Indexes"]) == + size(Exogenous_Forecast, 2) "If an exogenous matrix was utilized in the estimation procedure, it must be provided its prediction for the forecast procedure. If no exogenous matrix was utilized, Exogenous_Forecast must be missing" + @assert size(Exogenous_Forecast, 1) == steps_ahead "Exogenous_Forecast must have the same number of rows as steps_ahead" + + Exogenous_X = model.X[:, model.output.components["Exogenous_X"]["Indexes"]] + complete_matrix = create_X(model.level, model.stochastic_level, model.trend, + model.stochastic_trend, + model.seasonal, model.stochastic_seasonal, + model.freq_seasonal, + model.outlier, model.ζ_ω_threshold, Exogenous_X, + steps_ahead, Exogenous_Forecast) + + return complete_matrix[(end - steps_ahead + 1):end, :] * model.output.coefs +end + +""" +simulate(model::StateSpaceLearningModel, steps_ahead::Int, N_scenarios::Int; + Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, 0))::Matrix{Float64} where Fl + + Generate simulations for a given number of steps ahead using the provided StateSpaceLearning output and exogenous forecast data. + + # Arguments + - `model::StateSpaceLearningModel`: Model obtained from fitting. + - `steps_ahead::Int`: Number of steps ahead for simulation. + - `N_scenarios::Int`: Number of scenarios to simulate (default: 1000). + - `Exogenous_Forecast::Matrix{Fl}`: Exogenous variables forecast (default: zeros(steps_ahead, 0)) + + # Returns + - `Matrix{Float64}`: Matrix containing simulated values. +""" +function simulate(model::StateSpaceLearningModel, steps_ahead::Int, N_scenarios::Int; + Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, 0))::Matrix{Float64} where {Fl} + prediction = forecast(model, steps_ahead; Exogenous_Forecast=Exogenous_Forecast) + + simulation_X = zeros(steps_ahead, 0) + components_matrix = zeros(length(model.output.valid_indexes), 0) + N_components = 1 + + model_innovations = get_model_innovations(model) + for innovation in model_innovations + if innovation in keys(model.output.residuals_variances) + simulation_X = hcat(simulation_X, + get_innovation_simulation_X(model, innovation, steps_ahead)[(end - steps_ahead):(end - 1), + (end - steps_ahead + 1):end]) + comp = fill_innovation_coefs(model, innovation) + components_matrix = hcat(components_matrix, comp[model.output.valid_indexes]) + N_components += 1 + end + end + + components_matrix = hcat(components_matrix, model.output.ε[model.output.valid_indexes]) + simulation_X = hcat(simulation_X, Matrix(1.0 * I, steps_ahead, steps_ahead)) + components_matrix += rand(Normal(0, 1), size(components_matrix)) ./ 1e9 # Make sure matrix is positive definite + + ∑ = cov(components_matrix) + MV_dist = MvNormal(zeros(N_components), ∑) + o_noises = model.outlier ? + rand(Normal(0, std(model.output.components["o"]["Coefs"])), steps_ahead, + N_scenarios) : zeros(steps_ahead, N_scenarios) + + simulation = hcat([prediction for _ in 1:N_scenarios]...) + for s in 1:N_scenarios + sim_coefs = ones(size(simulation_X, 2)) .* NaN + + for i in 1:steps_ahead + rand_inovs = rand(MV_dist) + + for comp in eachindex(rand_inovs) + sim_coefs[i + (comp - 1) * steps_ahead] = rand_inovs[comp] + end + end + + simulation[:, s] += (simulation_X * sim_coefs + o_noises[:, s]) + end + + return simulation +end diff --git a/src/information_criteria.jl b/src/information_criteria.jl index fb1e2a9..0040a67 100644 --- a/src/information_criteria.jl +++ b/src/information_criteria.jl @@ -14,12 +14,13 @@ - `Float64`: Information criterion value. """ -function get_information(T::Int, K::Int, ε::Vector{Float64}; information_criteria::String = "aic")::Float64 +function get_information(T::Int, K::Int, ε::Vector{Float64}; + information_criteria::String="aic")::Float64 if information_criteria == "bic" - return T*log(var(ε)) + K*log(T) + return T * log(var(ε)) + K * log(T) elseif information_criteria == "aic" - return 2*K + T*log(var(ε)) + return 2 * K + T * log(var(ε)) elseif information_criteria == "aicc" - return 2*K + T*log(var(ε)) + ((2*K^2 +2*K)/(T - K - 1)) + return 2 * K + T * log(var(ε)) + ((2 * K^2 + 2 * K) / (T - K - 1)) end -end \ No newline at end of file +end diff --git a/src/models/default_model.jl b/src/models/default_model.jl deleted file mode 100644 index 9e4e747..0000000 --- a/src/models/default_model.jl +++ /dev/null @@ -1,283 +0,0 @@ -""" - ξ_size(T::Int)::Int - - Calculates the size of ξ innovation matrix based on the input T. - - # Arguments - - `T::Int`: Length of the original time series. - - # Returns - - `Int`: Size of ξ calculated from T. - -""" -ξ_size(T::Int)::Int = T - 2 - -""" -ζ_size(T::Int, ζ_ω_threshold::Int)::Int - - Calculates the size of ζ innovation matrix based on the input T. - - # Arguments - - `T::Int`: Length of the original time series. - - `ζ_ω_threshold::Int`: Stabilize parameter ζ. - - # Returns - - `Int`: Size of ζ calculated from T. - -""" -ζ_size(T::Int, ζ_ω_threshold::Int)::Int = T-ζ_ω_threshold-2 - -""" -ω_size(T::Int, s::Int)::Int - - Calculates the size of ω innovation matrix based on the input T. - - # Arguments - - `T::Int`: Length of the original time series. - - `s::Int`: Seasonal period. - - # Returns - - `Int`: Size of ω calculated from T. - -""" -ω_size(T::Int, s::Int, ζ_ω_threshold::Int)::Int = T - ζ_ω_threshold - s + 1 - -""" - create_ξ(T::Int, steps_ahead::Int)::Matrix - - Creates a matrix of innovations ξ based on the input sizes, and the desired steps ahead (this is necessary for the forecast function) - - # Arguments - - `T::Int`: Length of the original time series. - - `steps_ahead::Int`: Number of steps ahead (for estimation purposes this should be set at 0). - - # Returns - - `Matrix`: Matrix of innovations ξ constructed based on the input sizes. - -""" -function create_ξ(T::Int, steps_ahead::Int)::Matrix - ξ_matrix = Matrix{Float64}(undef, T+steps_ahead, T - 1) - for t in 1:T+steps_ahead - ξ_matrix[t, :] = t < T ? vcat(ones(t-1), zeros(T-t)) : ones(T-1) - end - - return ξ_matrix[:, 1:end-1] -end - -""" -create_ζ(T::Int, steps_ahead::Int, ζ_ω_threshold::Int)::Matrix - - Creates a matrix of innovations ζ based on the input sizes, and the desired steps ahead (this is necessary for the forecast function). - - # Arguments - - `T::Int`: Length of the original time series. - - `steps_ahead::Int`: Number of steps ahead (for estimation purposes this should be set at 0). - - `ζ_ω_threshold::Int`: Stabilize parameter ζ. - - # Returns - - `Matrix`: Matrix of innovations ζ constructed based on the input sizes. - -""" -function create_ζ(T::Int, steps_ahead::Int, ζ_ω_threshold::Int)::Matrix - ζ_matrix = Matrix{Float64}(undef, T+steps_ahead, T - 2) - for t in 1:T+steps_ahead - ζ_matrix[t, :] = t < T ? vcat(collect(t-2:-1:1), zeros(T-2-length(collect(t-2:-1:1)))) : collect(t-2:-1:t-T+1) - end - return ζ_matrix[:, 1:end-ζ_ω_threshold] -end - -""" -create_ω(T::Int, s::Int, steps_ahead::Int)::Matrix - - Creates a matrix of innovations ω based on the input sizes, and the desired steps ahead (this is necessary for the forecast function). - - # Arguments - - `T::Int`: Length of the original time series. - - `freq_seasonal::Int`: Seasonal period. - - `steps_ahead::Int`: Number of steps ahead (for estimation purposes this should be set at 0). - - # Returns - - `Matrix`: Matrix of innovations ω constructed based on the input sizes. - -""" -function create_ω(T::Int, freq_seasonal::Int, steps_ahead::Int, ζ_ω_threshold::Int)::Matrix - ω_matrix_size = ω_size(T, freq_seasonal, ζ_ω_threshold) + ζ_ω_threshold - ω_matrix = zeros(T+steps_ahead, ω_matrix_size) - for t in freq_seasonal+1:T+steps_ahead - ωₜ_coefs = zeros(ω_matrix_size) - Mₜ = Int(floor((t-1)/freq_seasonal)) - lag₁ = [t - j*freq_seasonal for j in 0:Mₜ-1] - lag₂ = [t - j*freq_seasonal - 1 for j in 0:Mₜ-1] - ωₜ_coefs[lag₁[lag₁.<=ω_matrix_size+(freq_seasonal - 1)] .- (freq_seasonal - 1)] .= 1 - ωₜ_coefs[lag₂[0 .< lag₂ .<=ω_matrix_size+(freq_seasonal - 1)] .- (freq_seasonal - 1)] .= -1 - ω_matrix[t, :] = ωₜ_coefs - end - return ω_matrix[:, 1:end-ζ_ω_threshold] -end - -""" - create_initial_states_Matrix(T::Int, s::Int, steps_ahead::Int, level::Bool, trend::Bool, seasonal::Bool)::Matrix - - Creates an initial states matrix based on the input parameters. - - # Arguments - - `T::Int`: Length of the original time series. - - `freq_seasonal::Int`: Seasonal period. - - `steps_ahead::Int`: Number of steps ahead. - - `level::Bool`: Flag for considering level component. - - `trend::Bool`: Flag for considering trend component. - - `seasonal::Bool`: Flag for considering seasonal component. - - # Returns - - `Matrix`: Initial states matrix constructed based on the input parameters. - -""" -function create_initial_states_Matrix(T::Int, freq_seasonal::Int, steps_ahead::Int, level::Bool, trend::Bool, seasonal::Bool)::Matrix - - initial_states_matrix = zeros(T+steps_ahead, 0) - level ? initial_states_matrix = hcat(initial_states_matrix, ones(T+steps_ahead, 1)) : nothing - trend ? initial_states_matrix = hcat(initial_states_matrix, vcat([0], collect(1:T+steps_ahead-1))) : nothing - - if seasonal - γ1_matrix = zeros(T+steps_ahead, freq_seasonal) - for t in 1:T+steps_ahead - γ1_matrix[t, t % freq_seasonal == 0 ? freq_seasonal : t % freq_seasonal] = 1.0 - end - return hcat(initial_states_matrix, γ1_matrix) - end - - return initial_states_matrix - -end - -""" -create_X(model_input::Dict, Exogenous_X::Matrix{Fl}, steps_ahead::Int=0, Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, size(Exogenous_X, 2))) where Fl - - Creates the StateSpaceLearning matrix X based on the model type and input parameters. - - # Arguments - - `model_type::String`: Type of model. - - `Exogenous_X::Matrix{Fl}`: Exogenous variables matrix. - - `steps_ahead::Int`: Number of steps ahead (default: 0). - - `Exogenous_Forecast::Matrix{Fl}`: Exogenous variables forecast matrix (default: zeros). - - # Returns - - `Matrix`: StateSpaceLearning matrix X constructed based on the input parameters. -""" -function create_X(model_input::Dict, Exogenous_X::Matrix{Fl}, - steps_ahead::Int=0, Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, size(Exogenous_X, 2))) where Fl - - outlier = model_input["outlier"]; ζ_ω_threshold = model_input["ζ_ω_threshold"]; T = size(Exogenous_X, 1) - - ξ_matrix = model_input["stochastic_level"] ? create_ξ(T, steps_ahead) : zeros(T+steps_ahead, 0) - ζ_matrix = model_input["stochastic_trend"] ? create_ζ(T, steps_ahead, ζ_ω_threshold) : zeros(T+steps_ahead, 0) - ω_matrix = model_input["stochastic_seasonal"] ? create_ω(T, model_input["freq_seasonal"], steps_ahead, ζ_ω_threshold) : zeros(T+steps_ahead, 0) - o_matrix = outlier ? create_o_matrix(T, steps_ahead) : zeros(T+steps_ahead, 0) - - initial_states_matrix = create_initial_states_Matrix(T, model_input["freq_seasonal"], steps_ahead, model_input["level"], model_input["trend"], model_input["seasonal"]) - return hcat(initial_states_matrix, ξ_matrix, ζ_matrix, ω_matrix, o_matrix, vcat(Exogenous_X, Exogenous_Forecast)) - -end - -""" -get_components_indexes(Exogenous_X::Matrix{Fl}, model_input::Dict)::Dict where Fl - - Generates indexes dict for different components based on the model type and input parameters. - - # Arguments - - `Exogenous_X::Matrix{Fl}`: Exogenous variables matrix. - - `model_input`: Dictionary containing the model input parameters. - - # Returns - - `Dict`: Dictionary containing the corresponding indexes for each component of the model. - -""" -function get_components_indexes(Exogenous_X::Matrix{Fl}, model_input::Dict)::Dict where Fl - - outlier = model_input["outlier"]; ζ_ω_threshold = model_input["ζ_ω_threshold"]; T = size(Exogenous_X, 1) - - FINAL_INDEX = 0 - - if model_input["level"] - μ1_indexes = [1] - initial_states_indexes = [1] - FINAL_INDEX += length(μ1_indexes) - else - μ1_indexes = Int[] - initial_states_indexes = Int[] - end - - if model_input["trend"] - ν1_indexes = [2] - initial_states_indexes = vcat(initial_states_indexes, ν1_indexes) - FINAL_INDEX += length(ν1_indexes) - else - ν1_indexes = Int[] - end - - if model_input["seasonal"] - γ1_indexes = collect(FINAL_INDEX+1:FINAL_INDEX+model_input["freq_seasonal"]) - initial_states_indexes = vcat(initial_states_indexes, γ1_indexes) - FINAL_INDEX += length(γ1_indexes) - else - γ1_indexes = Int[] - end - - if model_input["stochastic_level"] - ξ_indexes = collect(FINAL_INDEX+1:FINAL_INDEX+ξ_size(T)) - FINAL_INDEX += length(ξ_indexes) - else - ξ_indexes = Int[] - end - - if model_input["stochastic_trend"] - ζ_indexes = collect(FINAL_INDEX+1:FINAL_INDEX+ζ_size(T, ζ_ω_threshold)) - FINAL_INDEX += length(ζ_indexes) - else - ζ_indexes = Int[] - end - - if model_input["stochastic_seasonal"] - ω_indexes = collect(FINAL_INDEX+1:FINAL_INDEX+ω_size(T, model_input["freq_seasonal"], ζ_ω_threshold)) - FINAL_INDEX += length(ω_indexes) - else - ω_indexes = Int[] - end - - if outlier - o_indexes = collect(FINAL_INDEX+1:FINAL_INDEX+o_size(T)) - FINAL_INDEX += length(o_indexes) - else - o_indexes = Int[] - end - - exogenous_indexes = collect(FINAL_INDEX + 1:FINAL_INDEX + size(Exogenous_X, 2)) - - return Dict("μ1" => μ1_indexes, "ν1" => ν1_indexes, "γ1" => γ1_indexes, - "ξ" => ξ_indexes, "ζ" => ζ_indexes, "ω" => ω_indexes, "o" => o_indexes, - "Exogenous_X" => exogenous_indexes, "initial_states" => initial_states_indexes) -end - -""" - get_variances(ε::Vector{Fl}, coefs::Vector{Fl}, components_indexes::Dict{String, Vector{Int}})::Dict where Fl - - Calculates variances for each innovation component and for the residuals. - - # Arguments - - `ε::Vector{Fl}`: Vector of residuals. - - `coefs::Vector{Fl}`: Vector of coefficients. - - `components_indexes::Dict{String, Vector{Int}}`: Dictionary containing indexes for different components. - - # Returns - - `Dict`: Dictionary containing variances for each innovation component. - -""" -function get_variances(ε::Vector{Fl}, coefs::Vector{Fl}, components_indexes::Dict{String, Vector{Int}})::Dict where Fl - - variances = Dict() - for component in ["ξ", "ζ", "ω"] - !isempty(components_indexes[component]) ? variances[component] = var(coefs[components_indexes[component]]) : nothing - end - variances["ε"] = var(ε) - return variances -end \ No newline at end of file diff --git a/src/models/structural_model.jl b/src/models/structural_model.jl new file mode 100644 index 0000000..7d10a3c --- /dev/null +++ b/src/models/structural_model.jl @@ -0,0 +1,397 @@ +mutable struct StructuralModel <: StateSpaceLearningModel + y::Vector + X::Matrix + level::Bool + stochastic_level::Bool + trend::Bool + stochastic_trend::Bool + seasonal::Bool + stochastic_seasonal::Bool + freq_seasonal::Int + outlier::Bool + ζ_ω_threshold::Int + n_exogenous::Int + output::Union{Output,Nothing} + + function StructuralModel(y::Vector{Fl}; + level::Bool=true, stochastic_level::Bool=true, + trend::Bool=true, stochastic_trend::Bool=true, + seasonal::Bool=true, stochastic_seasonal::Bool=true, + freq_seasonal::Int=12, outlier::Bool=true, + ζ_ω_threshold::Int=12, + Exogenous_X::Matrix{Fl}=zeros(length(y), 0)) where {Fl} + n_exogenous = size(Exogenous_X, 2) + @assert !has_intercept(Exogenous_X) "Exogenous matrix must not have an intercept column" + @assert seasonal ? length(y) > freq_seasonal : true "Time series must be longer than the seasonal period" + + X = create_X(level, stochastic_level, trend, stochastic_trend, seasonal, + stochastic_seasonal, freq_seasonal, outlier, ζ_ω_threshold, + Exogenous_X) + + return new(y, X, level, stochastic_level, trend, stochastic_trend, seasonal, + stochastic_seasonal, freq_seasonal, outlier, ζ_ω_threshold, n_exogenous, + nothing) + end +end + +""" + ξ_size(T::Int)::Int + + Calculates the size of ξ innovation matrix based on the input T. + + # Arguments + - `T::Int`: Length of the original time series. + + # Returns + - `Int`: Size of ξ calculated from T. + +""" +ξ_size(T::Int)::Int = T - 2 + +""" +ζ_size(T::Int, ζ_ω_threshold::Int)::Int + + Calculates the size of ζ innovation matrix based on the input T. + + # Arguments + - `T::Int`: Length of the original time series. + - `ζ_ω_threshold::Int`: Stabilize parameter ζ. + + # Returns + - `Int`: Size of ζ calculated from T. + +""" +ζ_size(T::Int, ζ_ω_threshold::Int)::Int = T - ζ_ω_threshold - 2 + +""" +ω_size(T::Int, s::Int)::Int + + Calculates the size of ω innovation matrix based on the input T. + + # Arguments + - `T::Int`: Length of the original time series. + - `s::Int`: Seasonal period. + + # Returns + - `Int`: Size of ω calculated from T. + +""" +ω_size(T::Int, s::Int, ζ_ω_threshold::Int)::Int = T - ζ_ω_threshold - s + 1 + +""" + create_ξ(T::Int, steps_ahead::Int)::Matrix + + Creates a matrix of innovations ξ based on the input sizes, and the desired steps ahead (this is necessary for the forecast function) + + # Arguments + - `T::Int`: Length of the original time series. + - `steps_ahead::Int`: Number of steps ahead (for estimation purposes this should be set at 0). + + # Returns + - `Matrix`: Matrix of innovations ξ constructed based on the input sizes. + +""" +function create_ξ(T::Int, steps_ahead::Int)::Matrix + ξ_matrix = Matrix{Float64}(undef, T + steps_ahead, T - 1) + for t in 1:(T + steps_ahead) + ξ_matrix[t, :] = t < T ? vcat(ones(t - 1), zeros(T - t)) : ones(T - 1) + end + + return ξ_matrix[:, 1:(end - 1)] +end + +""" +create_ζ(T::Int, steps_ahead::Int, ζ_ω_threshold::Int)::Matrix + + Creates a matrix of innovations ζ based on the input sizes, and the desired steps ahead (this is necessary for the forecast function). + + # Arguments + - `T::Int`: Length of the original time series. + - `steps_ahead::Int`: Number of steps ahead (for estimation purposes this should be set at 0). + - `ζ_ω_threshold::Int`: Stabilize parameter ζ. + + # Returns + - `Matrix`: Matrix of innovations ζ constructed based on the input sizes. + +""" +function create_ζ(T::Int, steps_ahead::Int, ζ_ω_threshold::Int)::Matrix + ζ_matrix = Matrix{Float64}(undef, T + steps_ahead, T - 2) + for t in 1:(T + steps_ahead) + ζ_matrix[t, :] = t < T ? + vcat(collect((t - 2):-1:1), + zeros(T - 2 - length(collect((t - 2):-1:1)))) : + collect((t - 2):-1:(t - T + 1)) + end + return ζ_matrix[:, 1:(end - ζ_ω_threshold)] +end + +""" +create_ω(T::Int, s::Int, steps_ahead::Int)::Matrix + + Creates a matrix of innovations ω based on the input sizes, and the desired steps ahead (this is necessary for the forecast function). + + # Arguments + - `T::Int`: Length of the original time series. + - `freq_seasonal::Int`: Seasonal period. + - `steps_ahead::Int`: Number of steps ahead (for estimation purposes this should be set at 0). + + # Returns + - `Matrix`: Matrix of innovations ω constructed based on the input sizes. + +""" +function create_ω(T::Int, freq_seasonal::Int, steps_ahead::Int, ζ_ω_threshold::Int)::Matrix + ω_matrix_size = ω_size(T, freq_seasonal, ζ_ω_threshold) + ζ_ω_threshold + ω_matrix = zeros(T + steps_ahead, ω_matrix_size) + for t in (freq_seasonal + 1):(T + steps_ahead) + ωₜ_coefs = zeros(ω_matrix_size) + Mₜ = Int(floor((t - 1) / freq_seasonal)) + lag₁ = [t - j * freq_seasonal for j in 0:(Mₜ - 1)] + lag₂ = [t - j * freq_seasonal - 1 for j in 0:(Mₜ - 1)] + ωₜ_coefs[lag₁[lag₁ .<= ω_matrix_size + (freq_seasonal - 1)] .- (freq_seasonal - 1)] .= 1 + ωₜ_coefs[lag₂[0 .< lag₂ .<= ω_matrix_size + (freq_seasonal - 1)] .- (freq_seasonal - 1)] .= -1 + ω_matrix[t, :] = ωₜ_coefs + end + return ω_matrix[:, 1:(end - ζ_ω_threshold)] +end + +""" + create_initial_states_Matrix(T::Int, s::Int, steps_ahead::Int, level::Bool, trend::Bool, seasonal::Bool)::Matrix + + Creates an initial states matrix based on the input parameters. + + # Arguments + - `T::Int`: Length of the original time series. + - `freq_seasonal::Int`: Seasonal period. + - `steps_ahead::Int`: Number of steps ahead. + - `level::Bool`: Flag for considering level component. + - `trend::Bool`: Flag for considering trend component. + - `seasonal::Bool`: Flag for considering seasonal component. + + # Returns + - `Matrix`: Initial states matrix constructed based on the input parameters. + +""" +function create_initial_states_Matrix(T::Int, freq_seasonal::Int, steps_ahead::Int, + level::Bool, trend::Bool, seasonal::Bool)::Matrix + initial_states_matrix = zeros(T + steps_ahead, 0) + level ? initial_states_matrix = hcat(initial_states_matrix, ones(T + steps_ahead, 1)) : + nothing + trend ? + initial_states_matrix = hcat(initial_states_matrix, + vcat([0], collect(1:(T + steps_ahead - 1)))) : nothing + + if seasonal + γ1_matrix = zeros(T + steps_ahead, freq_seasonal) + for t in 1:(T + steps_ahead) + γ1_matrix[t, t % freq_seasonal == 0 ? freq_seasonal : t % freq_seasonal] = 1.0 + end + return hcat(initial_states_matrix, γ1_matrix) + end + + return initial_states_matrix +end + +""" +create_X(level::Bool, stochastic_level::Bool, trend::Bool, stochastic_trend::Bool, + seasonal::Bool, stochastic_seasonal::Bool, freq_seasonal::Int, outlier::Bool, ζ_ω_threshold::Int, Exogenous_X::Matrix{Fl}, + steps_ahead::Int=0, Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, size(Exogenous_X, 2))) where Fl + + Creates the StateSpaceLearning matrix X based on the model type and input parameters. + + # Arguments + - `level::Bool`: Flag for considering level component. + - `stochastic_level::Bool`: Flag for considering stochastic level component. + - `trend::Bool`: Flag for considering trend component. + - `stochastic_trend::Bool`: Flag for considering stochastic trend component. + - `seasonal::Bool`: Flag for considering seasonal component. + - `stochastic_seasonal::Bool`: Flag for considering stochastic seasonal component. + - `freq_seasonal::Int`: Seasonal period. + - `outlier::Bool`: Flag for considering outlier component. + - `ζ_ω_threshold::Int`: Stabilize parameter ζ. + - `Exogenous_X::Matrix{Fl}`: Exogenous variables matrix. + - `steps_ahead::Int`: Number of steps ahead (default: 0). + - `Exogenous_Forecast::Matrix{Fl}`: Exogenous variables forecast matrix (default: zeros). + + # Returns + - `Matrix`: StateSpaceLearning matrix X constructed based on the input parameters. +""" +function create_X(level::Bool, stochastic_level::Bool, trend::Bool, stochastic_trend::Bool, + seasonal::Bool, stochastic_seasonal::Bool, freq_seasonal::Int, + outlier::Bool, ζ_ω_threshold::Int, Exogenous_X::Matrix{Fl}, + steps_ahead::Int=0, + Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, size(Exogenous_X, 2))) where {Fl} + T = size(Exogenous_X, 1) + + ξ_matrix = stochastic_level ? create_ξ(T, steps_ahead) : zeros(T + steps_ahead, 0) + ζ_matrix = stochastic_trend ? create_ζ(T, steps_ahead, ζ_ω_threshold) : + zeros(T + steps_ahead, 0) + ω_matrix = stochastic_seasonal ? + create_ω(T, freq_seasonal, steps_ahead, ζ_ω_threshold) : + zeros(T + steps_ahead, 0) + o_matrix = outlier ? create_o_matrix(T, steps_ahead) : zeros(T + steps_ahead, 0) + + initial_states_matrix = create_initial_states_Matrix(T, freq_seasonal, steps_ahead, + level, trend, seasonal) + return hcat(initial_states_matrix, ξ_matrix, ζ_matrix, ω_matrix, o_matrix, + vcat(Exogenous_X, Exogenous_Forecast)) +end + +""" +function get_components_indexes(model::StructuralModel)::Dict where Fl + + Generates indexes dict for different components based on the model type and input parameters. + + # Arguments + - `model::StructuralModel`: StructuralModel object. + + # Returns + - `Dict`: Dictionary containing the corresponding indexes for each component of the model. + +""" +function get_components_indexes(model::StructuralModel)::Dict + T = length(model.y) + + FINAL_INDEX = 0 + + if model.level + μ1_indexes = [1] + initial_states_indexes = [1] + FINAL_INDEX += length(μ1_indexes) + else + μ1_indexes = Int[] + initial_states_indexes = Int[] + end + + if model.trend + ν1_indexes = [2] + initial_states_indexes = vcat(initial_states_indexes, ν1_indexes) + FINAL_INDEX += length(ν1_indexes) + else + ν1_indexes = Int[] + end + + if model.seasonal + γ1_indexes = collect((FINAL_INDEX + 1):(FINAL_INDEX + model.freq_seasonal)) + initial_states_indexes = vcat(initial_states_indexes, γ1_indexes) + FINAL_INDEX += length(γ1_indexes) + else + γ1_indexes = Int[] + end + + if model.stochastic_level + ξ_indexes = collect((FINAL_INDEX + 1):(FINAL_INDEX + ξ_size(T))) + FINAL_INDEX += length(ξ_indexes) + else + ξ_indexes = Int[] + end + + if model.stochastic_trend + ζ_indexes = collect((FINAL_INDEX + 1):(FINAL_INDEX + ζ_size(T, model.ζ_ω_threshold))) + FINAL_INDEX += length(ζ_indexes) + else + ζ_indexes = Int[] + end + + if model.stochastic_seasonal + ω_indexes = collect((FINAL_INDEX + 1):(FINAL_INDEX + ω_size(T, model.freq_seasonal, + model.ζ_ω_threshold))) + FINAL_INDEX += length(ω_indexes) + else + ω_indexes = Int[] + end + + if model.outlier + o_indexes = collect((FINAL_INDEX + 1):(FINAL_INDEX + o_size(T))) + FINAL_INDEX += length(o_indexes) + else + o_indexes = Int[] + end + + exogenous_indexes = collect((FINAL_INDEX + 1):(FINAL_INDEX + model.n_exogenous)) + + return Dict("μ1" => μ1_indexes, "ν1" => ν1_indexes, "γ1" => γ1_indexes, + "ξ" => ξ_indexes, "ζ" => ζ_indexes, "ω" => ω_indexes, "o" => o_indexes, + "Exogenous_X" => exogenous_indexes, + "initial_states" => initial_states_indexes) +end + +""" + function get_variances(model::StructuralModel, ε::Vector{Fl}, coefs::Vector{Fl}, components_indexes::Dict{String, Vector{Int}})::Dict where Fl + + Calculates variances for each innovation component and for the residuals. + + # Arguments + - `model::StructuralModel`: StructuralModel object. + - `ε::Vector{Fl}`: Vector of residuals. + - `coefs::Vector{Fl}`: Vector of coefficients. + - `components_indexes::Dict{String, Vector{Int}}`: Dictionary containing indexes for different components. + + # Returns + - `Dict`: Dictionary containing variances for each innovation component. + +""" +function get_variances(model::StructuralModel, ε::Vector{Fl}, coefs::Vector{Fl}, + components_indexes::Dict{String,Vector{Int}})::Dict where {Fl} + model_innovations = get_model_innovations(model) + + variances = Dict() + for component in model_innovations + variances[component] = var(coefs[components_indexes[component]]) + end + variances["ε"] = var(ε) + return variances +end + +""" + get_model_innovations(model::StructuralModel)::Vector + + Returns a vector containing the model innovations based on the input parameters. + + # Arguments + - `model::StructuralModel`: StructuralModel object. + + # Returns + - `Vector`: Vector containing the model innovations. + +""" +function get_model_innovations(model::StructuralModel) + model_innovations = String[] + if model.stochastic_level + push!(model_innovations, "ξ") + end + + if model.stochastic_trend + push!(model_innovations, "ζ") + end + + if model.stochastic_seasonal + push!(model_innovations, "ω") + end + return model_innovations +end + +""" + get_innovation_functions(model::StructuralModel, innovation::String)::Function + + Returns the innovation function based on the input innovation string. + + # Arguments + - `model::StructuralModel`: StructuralModel object. + - `innovation::String`: Innovation string. + - steps_ahead::Int: Number of steps ahead. + + # Returns + +""" +function get_innovation_simulation_X(model::StructuralModel, innovation::String, + steps_ahead::Int) + if innovation == "ξ" + return create_ξ(length(model.y) + steps_ahead + 1, 0) + elseif innovation == "ζ" + return create_ζ(length(model.y) + steps_ahead + 1, 0, 1) + elseif innovation == "ω" + return create_ω(length(model.y) + steps_ahead + 1, model.freq_seasonal, 0, 1) + end +end + +isfitted(model::StructuralModel) = isnothing(model.output) ? false : true diff --git a/src/structs.jl b/src/structs.jl index 5ed9540..c349ff6 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -4,29 +4,18 @@ A mutable struct to store various components and results of a model estimation. # Fields - - `model_input::Dict`: Dictionary containing the model input parameters. - - `Create_X::Function`: Function used to create the StateSpaceLearning Matrix. - - `X::Matrix`: StateSpaceLearning Matrix data used in the model. - `coefs::Vector`: Coefficients obtained from the model. - `ε::Vector`: Residuals of the model. - `fitted::Vector`: Fitted values from the model. - - `components::Dict`: Dictionary containing different components. - `residuals_variances::Dict`: Dictionary storing variances of residuals for different components. - - `T::Int`: Integer representing a parameter 'T'. - - `outlier::Bool`: Boolean indicating the presence of outlier component. - `valid_indexes::Vector{Int}`: Vector containing valid indexes (non NaN) of the time series. - - `ζ_ω_threshold::Int`: ζ_ω_threshold parameter. - - `y::Vector{Fl}`: Vector of data. - + - `components::Dict`: Dictionary containing components of the model. """ -mutable struct Output - model_input::Dict - Create_X::Function - X::Matrix +mutable struct Output coefs::Vector ε::Vector fitted::Vector - components::Dict residuals_variances::Dict valid_indexes::Vector{Int} -end \ No newline at end of file + components::Dict +end diff --git a/src/utils.jl b/src/utils.jl index 20e970c..34696ed 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -15,16 +15,19 @@ - "Values": Values computed from `X` and component coefficients. """ -function build_components(X::Matrix{Tl}, coefs::Vector{Float64}, components_indexes::Dict{String, Vector{Int}})::Dict where Tl +function build_components(X::Matrix{Tl}, coefs::Vector{Float64}, + components_indexes::Dict{String,Vector{Int}})::Dict where {Tl} components = Dict() for key in keys(components_indexes) components[key] = Dict() - components[key]["Coefs"] = coefs[components_indexes[key]] + components[key]["Coefs"] = coefs[components_indexes[key]] components[key]["Indexes"] = components_indexes[key] - components[key]["Values"] = X[:, components_indexes[key]]*coefs[components_indexes[key]] + components[key]["Values"] = X[:, components_indexes[key]] * + coefs[components_indexes[key]] end if haskey(components, "Exogenous_X") - components["Exogenous_X"]["Selected"] = findall(i -> i != 0, components["Exogenous_X"]["Coefs"]) + components["Exogenous_X"]["Selected"] = findall(i -> i != 0, + components["Exogenous_X"]["Coefs"]) end return components end @@ -47,9 +50,12 @@ get_fit_and_residuals(estimation_ε::Vector{Float64}, coefs::Vector{Float64}, X: - `fitted::Vector{Float64}`: Vector of fitted values computed from input data and coefficients. """ -function get_fit_and_residuals(estimation_ε::Vector{Float64}, coefs::Vector{Float64}, X::Matrix{Tl}, valid_indexes::Vector{Int}, T::Int)::Tuple{Vector{Float64}, Vector{Float64}} where Tl - ε = fill(NaN, T); ε[valid_indexes] = estimation_ε - fitted = X*coefs +function get_fit_and_residuals(estimation_ε::Vector{Float64}, coefs::Vector{Float64}, + X::Matrix{Tl}, valid_indexes::Vector{Int}, + T::Int)::Tuple{Vector{Float64},Vector{Float64}} where {Tl} + ε = fill(NaN, T) + ε[valid_indexes] = estimation_ε + fitted = X * coefs return ε, fitted end @@ -99,10 +105,12 @@ handle_missing_values(X::Matrix{Tl}, y::Vector{Fl}) -> Tuple{Vector{Fl}, Matrix{ - `X::Matrix{Tl}`: Input matrix without missing values. - `valid_indexes::Vector{Int}`: Vector containing valid indexes of the time series. """ -function handle_missing_values(X::Matrix{Tl}, y::Vector{Fl})::Tuple{Vector{Fl}, Matrix{Tl}, Vector{Int}} where {Tl, Fl} - - invalid_indexes = unique(vcat([i[1] for i in findall(i -> any(isnan, i), X)], findall(i -> isnan(i), y))) - valid_indexes = setdiff(1:length(y), invalid_indexes) +function handle_missing_values(X::Matrix{Tl}, + y::Vector{Fl})::Tuple{Vector{Fl},Matrix{Tl}, + Vector{Int}} where {Tl,Fl} + invalid_indexes = unique(vcat([i[1] for i in findall(i -> any(isnan, i), X)], + findall(i -> isnan(i), y))) + valid_indexes = setdiff(1:length(y), invalid_indexes) return y[valid_indexes], X[valid_indexes, :], valid_indexes end @@ -118,26 +126,28 @@ has_intercept(X::Matrix{Tl})::Bool where Tl # Returns - `Bool`: True if the input matrix has a constant column, false otherwise. """ -function has_intercept(X::Matrix{Tl})::Bool where Tl +function has_intercept(X::Matrix{Tl})::Bool where {Tl} return any([all(X[:, i] .== 1) for i in 1:size(X, 2)]) end """ -fill_innovation_coefs(component::String, output::Output)::Vector{Float64} +fill_innovation_coefs(model::StructuralModel, T::Int, component::String)::Vector{Float64} Build the innovation coefficients for a given component with same length as the original time series and coefficients attributed to the first observation they are associated with. # Arguments + - `model::StructuralModel`: Structural model. + - `T::Int`: Length of the original time series. - `component::String`: Component name. - - `output::Output`: Output object obtained from model fitting. # Returns - `Vector{Float64}`: Vector containing innovation coefficients for the given component. """ -function fill_innovation_coefs(T::Int, component::String, output::Output)::Vector{Float64} +function fill_innovation_coefs(model::StructuralModel, component::String)::Vector{Float64} + T = length(model.y) inov_comp = zeros(T) - for (i, idx) in enumerate(output.components[component]["Indexes"]) - inov_comp[findfirst(i -> i != 0, output.X[:, idx])] = output.components[component]["Coefs"][i] + for (i, idx) in enumerate(model.output.components[component]["Indexes"]) + inov_comp[findfirst(i -> i != 0, model.X[:, idx])] = model.output.components[component]["Coefs"][i] end return inov_comp -end \ No newline at end of file +end diff --git a/test/StateSpaceLearning.jl b/test/StateSpaceLearning.jl deleted file mode 100644 index c369d92..0000000 --- a/test/StateSpaceLearning.jl +++ /dev/null @@ -1,65 +0,0 @@ -@testset "Function: fit_model" begin - y1 = rand(100) - y2 = rand(100); y2[10:20] .= NaN - - output1 = StateSpaceLearning.fit_model(y1) - @test length(output1.ε) == 100 - @test length(output1.fitted) == 100 - @test size(output1.X, 1) == 100 - @test size(output1.X, 2) == length(output1.coefs) - - output2 = StateSpaceLearning.fit_model(y2) - @test output2.valid_indexes == setdiff(1:100, 10:20) - @test all(isnan.(output2.ε[10:20])) - @test !all(isnan.(output2.fitted[10:20])) - - output3 = StateSpaceLearning.fit_model(y1; model_input = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 12, "outlier" => true, "ζ_ω_threshold" => 1)) - @test length(output3.coefs) - 22 == length(output1.coefs) - - @test_throws AssertionError StateSpaceLearning.fit_model(y1; model_input = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 1000, "outlier" => true, "ζ_ω_threshold" => 1)) - - @test_throws AssertionError StateSpaceLearning.fit_model(y1; estimation_input = Dict("α" => -0.1, "information_criteria" => "aic", "ψ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true)) - @test_throws AssertionError StateSpaceLearning.fit_model(y1; estimation_input = Dict("α" => 1.1, "information_criteria" => "aic", "ψ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true)) - -end - -@testset "Function: forecast" begin - y1 = rand(100) - y2 = rand(100); y2[10:20] .= NaN - - output1 = StateSpaceLearning.fit_model(y1) - @test length(StateSpaceLearning.forecast(output1, 10)) == 10 - - output2 = StateSpaceLearning.fit_model(y2; Exogenous_X = rand(100, 3)) - @test length(StateSpaceLearning.forecast(output2, 10; Exogenous_Forecast = rand(10, 3))) == 10 - - @test_throws AssertionError StateSpaceLearning.forecast(output1, 10; Exogenous_Forecast = rand(5, 3)) - @test_throws AssertionError StateSpaceLearning.forecast(output2, 10) - @test_throws AssertionError StateSpaceLearning.forecast(output2, 10; Exogenous_Forecast = rand(5, 3)) - - y3 = [4.718, 4.77, 4.882, 4.859, 4.795, 4.905, 4.997, 4.997, 4.912, 4.779, 4.644, 4.77, 4.744, 4.836, 4.948, 4.905, 4.828, 5.003, 5.135, 5.135, 5.062, 4.89, 4.736, 4.941, 4.976, 5.01, 5.181, 5.093, 5.147, 5.181, 5.293, 5.293, 5.214, 5.087, 4.983, 5.111, 5.141, 5.192, 5.262, 5.198, 5.209, 5.384, 5.438, 5.488, 5.342, 5.252, 5.147, 5.267, 5.278, 5.278, 5.463, 5.459, 5.433, 5.493, 5.575, 5.605, 5.468, 5.351, 5.192, 5.303, 5.318, 5.236, 5.459, 5.424, 5.455, 5.575, 5.71, 5.68, 5.556, 5.433, 5.313, 5.433, 5.488, 5.451, 5.587, 5.594, 5.598, 5.752, 5.897, 5.849, 5.743, 5.613, 5.468, 5.627, 5.648, 5.624, 5.758, 5.746, 5.762, 5.924, 6.023, 6.003, 5.872, 5.723, 5.602, 5.723, 5.752, 5.707, 5.874, 5.852, 5.872, 6.045, 6.142, 6.146, 6.001, 5.849, 5.72, 5.817, 5.828, 5.762, 5.891, 5.852, 5.894, 6.075, 6.196, 6.224, 6.001, 5.883, 5.736, 5.82, 5.886, 5.834, 6.006, 5.981, 6.04, 6.156, 6.306, 6.326, 6.137, 6.008, 5.891, 6.003, 6.033, 5.968, 6.037, 6.133, 6.156, 6.282, 6.432, 6.406, 6.23, 6.133, 5.966, 6.068] - output3 = StateSpaceLearning.fit_model(y3) - forecast3 = trunc.(StateSpaceLearning.forecast(output3, 18); digits = 3) - @assert forecast3 == [6.11, 6.082, 6.221, 6.19, 6.197, 6.328, 6.447, 6.44, 6.285, 6.163, 6.026, 6.142, 6.166, 6.138, 6.278, 6.246, 6.253, 6.384] - -end - -@testset "Function: simulate" begin - y1 = rand(100) - y2 = rand(100); y2[10:20] .= NaN - - output1 = StateSpaceLearning.fit_model(y1) - @test size(StateSpaceLearning.simulate(output1, 10, 100)) == (10, 100) - - output2 = StateSpaceLearning.fit_model(y2; Exogenous_X = rand(100, 3)) - @test size(StateSpaceLearning.simulate(output2, 10, 100; Exogenous_Forecast = rand(10, 3))) == (10, 100) - - y3 = [4.718, 4.77, 4.882, 4.859, 4.795, 4.905, 4.997, 4.997, 4.912, 4.779, 4.644, 4.77, 4.744, 4.836, 4.948, 4.905, 4.828, 5.003, 5.135, 5.135, 5.062, 4.89, 4.736, 4.941, 4.976, 5.01, 5.181, 5.093, 5.147, 5.181, 5.293, 5.293, 5.214, 5.087, 4.983, 5.111, 5.141, 5.192, 5.262, 5.198, 5.209, 5.384, 5.438, 5.488, 5.342, 5.252, 5.147, 5.267, 5.278, 5.278, 5.463, 5.459, 5.433, 5.493, 5.575, 5.605, 5.468, 5.351, 5.192, 5.303, 5.318, 5.236, 5.459, 5.424, 5.455, 5.575, 5.71, 5.68, 5.556, 5.433, 5.313, 5.433, 5.488, 5.451, 5.587, 5.594, 5.598, 5.752, 5.897, 5.849, 5.743, 5.613, 5.468, 5.627, 5.648, 5.624, 5.758, 5.746, 5.762, 5.924, 6.023, 6.003, 5.872, 5.723, 5.602, 5.723, 5.752, 5.707, 5.874, 5.852, 5.872, 6.045, 6.142, 6.146, 6.001, 5.849, 5.72, 5.817, 5.828, 5.762, 5.891, 5.852, 5.894, 6.075, 6.196, 6.224, 6.001, 5.883, 5.736, 5.82, 5.886, 5.834, 6.006, 5.981, 6.04, 6.156, 6.306, 6.326, 6.137, 6.008, 5.891, 6.003, 6.033, 5.968, 6.037, 6.133, 6.156, 6.282, 6.432, 6.406, 6.23, 6.133, 5.966, 6.068] - output3 = StateSpaceLearning.fit_model(y3) - Random.seed!(123) - simulate1 = trunc.(StateSpaceLearning.simulate(output3, 18, 1000); digits = 3) - Random.seed!(123) - simulate2 = trunc.(StateSpaceLearning.simulate(output3, 18, 1000; simulate_outliers = false); digits = 3) - @assert simulate1 != simulate2 - -end \ No newline at end of file diff --git a/test/estimation_procedure.jl b/test/estimation_procedure.jl new file mode 100644 index 0000000..2069c3d --- /dev/null +++ b/test/estimation_procedure.jl @@ -0,0 +1,197 @@ +Random.seed!(1234) +Estimation_X = rand(30, 3) +estimation_y = rand(30) +α = 0.5 +penalty_factor = ones(3) +@testset "Function: get_path_information_criteria" begin + intercept1 = true + intercept2 = false + + model1 = glmnet(Estimation_X, estimation_y; alpha=α, penalty_factor=penalty_factor, + intercept=intercept1, dfmax=size(Estimation_X, 2), + lambda_min_ratio=0.001) + coefs1, ε1 = StateSpaceLearning.get_path_information_criteria(model1, Estimation_X, + estimation_y, "aic"; + intercept=intercept1) + @test length(coefs1) == 4 + @test coefs1[1] != 0 + @test all(coefs1[2:end] .== 0) + @test length(ε1) == 30 + + model2 = glmnet(Estimation_X, estimation_y; alpha=α, penalty_factor=penalty_factor, + intercept=intercept2, dfmax=size(Estimation_X, 2), + lambda_min_ratio=0.001) + coefs2, ε2 = StateSpaceLearning.get_path_information_criteria(model2, Estimation_X, + estimation_y, "aic"; + intercept=intercept2) + @test length(coefs2) == 3 + @test all(coefs2 .== 0) + @test length(ε2) == 30 +end + +@testset "Function: fit_glmnet" begin + coefs, ε = StateSpaceLearning.fit_glmnet(Estimation_X, estimation_y, α; + information_criteria="aic", + penalty_factor=penalty_factor, intercept=true) + @test length(coefs) == 4 + @test length(ε) == 30 +end + +@testset "Function: fit_lasso" begin + Random.seed!(1234) + y = rand(10) + Exogenous_X = hcat(rand(10, 3), vcat(zeros(3), ones(1), zeros(6))) + + Basic_Structural = StateSpaceLearning.StructuralModel(y; level=true, + stochastic_level=true, trend=true, + stochastic_trend=true, + seasonal=true, + stochastic_seasonal=true, + freq_seasonal=2, outlier=true, + ζ_ω_threshold=0, + Exogenous_X=Exogenous_X) + Basic_Structural_w_level = StateSpaceLearning.StructuralModel(y; level=false, + stochastic_level=true, + trend=true, + stochastic_trend=true, + seasonal=true, + stochastic_seasonal=true, + freq_seasonal=2, + outlier=true, + ζ_ω_threshold=0, + Exogenous_X=Exogenous_X) + + components_indexes = StateSpaceLearning.get_components_indexes(Basic_Structural) + components_indexes2 = StateSpaceLearning.get_components_indexes(Basic_Structural_w_level) + + X = Basic_Structural.X + X2 = Basic_Structural_w_level.X + + coefs0, ε0 = StateSpaceLearning.fit_lasso(X, y, 0.1, "aic", true, components_indexes, + ones(size(X, 2) - 1); rm_average=true) + @test length(coefs0) == 43 + @test length(ε0) == 10 + + coefs1, ε1 = StateSpaceLearning.fit_lasso(X2, y, 0.1, "aic", true, components_indexes2, + ones(size(X2, 2)); rm_average=false) + @test length(coefs1) == 42 + @test length(ε1) == 10 + + coefs2, ε2 = StateSpaceLearning.fit_lasso(X, y, 0.1, "aic", true, components_indexes, + ones(size(X, 2) - 1); rm_average=true) + @test coefs2[1] == mean(y) + @test length(coefs2) == 43 + @test length(ε2) == 10 + + coefs3, ε3 = StateSpaceLearning.fit_lasso(X, y, 0.1, "aic", false, components_indexes, + ones(size(X, 2) - 1); rm_average=true) + @test coefs3[components_indexes["o"][4]] == 0 + @test all(coefs3[components_indexes["Exogenous_X"]] .!= 0) + @test length(coefs3) == 43 + @test length(ε3) == 10 + + coefs4, ε4 = StateSpaceLearning.fit_lasso(X, y, 0.1, "aic", true, components_indexes, + vcat(ones(1), ones(size(X, 2) - 2) .* Inf); + rm_average=true) + @test all(coefs4[3:end] .== 0) + @test length(coefs4) == 43 + @test length(ε4) == 10 +end + +@testset "Function: estimation_procedure" begin + Random.seed!(1234) + y = rand(10) + Exogenous_X = hcat(rand(10, 3), vcat(zeros(3), ones(1), zeros(6))) + + Basic_Structural = StateSpaceLearning.StructuralModel(y; level=true, + stochastic_level=true, trend=true, + stochastic_trend=true, + seasonal=true, + stochastic_seasonal=true, + freq_seasonal=2, outlier=true, + ζ_ω_threshold=0, + Exogenous_X=Exogenous_X) + Basic_Structural_w_level = StateSpaceLearning.StructuralModel(y; level=false, + stochastic_level=true, + trend=true, + stochastic_trend=true, + seasonal=true, + stochastic_seasonal=true, + freq_seasonal=2, + outlier=true, + ζ_ω_threshold=0, + Exogenous_X=Exogenous_X) + + components_indexes = StateSpaceLearning.get_components_indexes(Basic_Structural) + components_indexes2 = StateSpaceLearning.get_components_indexes(Basic_Structural_w_level) + + X = Basic_Structural.X + X2 = Basic_Structural_w_level.X + + coefs1, ε1 = StateSpaceLearning.estimation_procedure(X, y, components_indexes, 0.1, + "aic", 0.05, true, true) + @test length(coefs1) == 43 + @test length(ε1) == 10 + + coefs1, ε1 = StateSpaceLearning.estimation_procedure(X2, y, components_indexes2, 0.1, + "aic", 0.05, true, true) + @test length(coefs1) == 42 + @test length(ε1) == 10 + + coefs2, ε2 = StateSpaceLearning.estimation_procedure(X, y, components_indexes, 0.1, + "aic", 0.05, true, false) + @test length(coefs2) == 43 + @test length(ε2) == 10 + @test all(coefs2[components_indexes["initial_states"][2:end] .- 1] .!= 0) +end + +@testset "Function: get_dummy_indexes" begin + Exogenous_X1 = hcat(rand(10, 3), vcat(zeros(3), ones(1), zeros(6))) + Exogenous_X2 = hcat(rand(10, 3)) + + dummy_indexes1 = StateSpaceLearning.get_dummy_indexes(Exogenous_X1) + @test dummy_indexes1 == [4] + + dummy_indexes2 = StateSpaceLearning.get_dummy_indexes(Exogenous_X2) + @test dummy_indexes2 == [] +end + +@testset "Function: get_outlier_duplicate_columns" begin + Random.seed!(1234) + y = rand(10) + Exogenous_X = hcat(rand(10, 3), vcat(zeros(3), ones(1), zeros(6))) + Exogenous_X2 = rand(10, 3) + + Basic_Structural = StateSpaceLearning.StructuralModel(y; level=true, + stochastic_level=true, trend=true, + stochastic_trend=true, + seasonal=true, + stochastic_seasonal=true, + freq_seasonal=2, outlier=true, + ζ_ω_threshold=0, + Exogenous_X=Exogenous_X) + Basic_Structural_w_out = StateSpaceLearning.StructuralModel(y; level=true, + stochastic_level=true, + trend=true, + stochastic_trend=true, + seasonal=true, + stochastic_seasonal=true, + freq_seasonal=2, + outlier=true, + ζ_ω_threshold=0, + Exogenous_X=Exogenous_X2) + + components_indexes = StateSpaceLearning.get_components_indexes(Basic_Structural) + components_indexes2 = StateSpaceLearning.get_components_indexes(Basic_Structural_w_out) + + X = Basic_Structural.X + X2 = Basic_Structural_w_out.X + + outlier_duplicate_columns1 = StateSpaceLearning.get_outlier_duplicate_columns(X, + components_indexes) + @test outlier_duplicate_columns1 == [32] + + outlier_duplicate_columns2 = StateSpaceLearning.get_outlier_duplicate_columns(X2, + components_indexes2) + @test outlier_duplicate_columns2 == [] +end diff --git a/test/estimation_procedure/default_estimation_procedure.jl b/test/estimation_procedure/default_estimation_procedure.jl deleted file mode 100644 index f0edfd9..0000000 --- a/test/estimation_procedure/default_estimation_procedure.jl +++ /dev/null @@ -1,127 +0,0 @@ -Random.seed!(1234) -Estimation_X = rand(30, 3) -estimation_y = rand(30) -α = 0.5 -penalty_factor = ones(3) -@testset "Function: get_path_information_criteria" begin - intercept1 = true - intercept2 = false - - model1 = glmnet(Estimation_X, estimation_y, alpha = α, penalty_factor = penalty_factor, intercept = intercept1, dfmax=size(Estimation_X, 2), lambda_min_ratio=0.001) - coefs1, ε1 = StateSpaceLearning.get_path_information_criteria(model1, Estimation_X, estimation_y, "aic"; intercept = intercept1) - @test length(coefs1) == 4 - @test coefs1[1] != 0 - @test all(coefs1[2:end] .== 0) - @test length(ε1) == 30 - - model2 = glmnet(Estimation_X, estimation_y, alpha = α, penalty_factor = penalty_factor, intercept = intercept2, dfmax=size(Estimation_X, 2), lambda_min_ratio=0.001) - coefs2, ε2 = StateSpaceLearning.get_path_information_criteria(model2, Estimation_X, estimation_y, "aic"; intercept = intercept2) - @test length(coefs2) == 3 - @test all(coefs2 .== 0) - @test length(ε2) == 30 -end - -@testset "Function: fit_glmnet" begin - coefs, ε = StateSpaceLearning.fit_glmnet(Estimation_X, estimation_y, α; information_criteria="aic", penalty_factor=penalty_factor, intercept = true) - @test length(coefs) == 4 - @test length(ε) == 30 -end - -@testset "Function: fit_lasso" begin - Random.seed!(1234) - Exogenous_X = hcat(rand(10, 3), vcat(zeros(3), ones(1), zeros(6))) - Basic_Structural = Dict("level" => true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - Basic_Structural_w_level = Dict("level" => false, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - - components_indexes = StateSpaceLearning.get_components_indexes(Exogenous_X, Basic_Structural) - components_indexes2 = StateSpaceLearning.get_components_indexes(Exogenous_X, Basic_Structural_w_level) - - Estimation_X = StateSpaceLearning.create_X(Basic_Structural, Exogenous_X) - Estimation_X2 = StateSpaceLearning.create_X(Basic_Structural_w_level, Exogenous_X) - estimation_y = Estimation_X*rand(size(Estimation_X, 2)) + rand(10) - - coefs1, ε1 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", true, components_indexes, ones(size(Estimation_X, 2) - 1); rm_average = true) - @test length(coefs1) == 43 - @test length(ε1) == 10 - - coefs1, ε1 = StateSpaceLearning.fit_lasso(Estimation_X2, estimation_y, 0.1, "aic", true, components_indexes2, ones(size(Estimation_X2, 2)); rm_average = false) - @test length(coefs1) == 42 - @test length(ε1) == 10 - - coefs2, ε2 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", true, components_indexes, ones(size(Estimation_X, 2) - 1); rm_average = true) - @test coefs2[1] == mean(estimation_y) - @test length(coefs2) == 43 - @test length(ε2) == 10 - - coefs3, ε3 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", false, components_indexes, ones(size(Estimation_X, 2) - 1); rm_average = true) - @test coefs3[components_indexes["o"][4]] == 0 - @test all(coefs3[components_indexes["Exogenous_X"]] .!= 0) - @test length(coefs3) == 43 - @test length(ε3) == 10 - - coefs4, ε4 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", true, components_indexes, vcat(ones(1), ones(size(Estimation_X,2) - 2).*Inf); rm_average = true) - @test all(coefs4[3:end] .== 0) - @test length(coefs4) == 43 - @test length(ε4) == 10 -end - -@testset "Function: default_estimation_procedure" begin - Random.seed!(1234) - Exogenous_X = hcat(rand(10, 3), vcat(ones(3), zeros(1), ones(6))) - Basic_Structural = Dict("level" => true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - Basic_Structural_w_level = Dict("level" => false, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - - components_indexes = StateSpaceLearning.get_components_indexes(Exogenous_X, Basic_Structural) - components_indexes2 = StateSpaceLearning.get_components_indexes(Exogenous_X, Basic_Structural_w_level) - - Estimation_X = StateSpaceLearning.create_X(Basic_Structural, Exogenous_X) - Estimation_X2 = StateSpaceLearning.create_X(Basic_Structural_w_level, Exogenous_X) - - estimation_y = Estimation_X*rand(size(Estimation_X, 2)) + rand(10).*5 - - estimation_input1 = Dict("α" => 0.1, "information_criteria" => "aic", "ϵ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true) - coefs1, ε1 = StateSpaceLearning.default_estimation_procedure(Estimation_X, estimation_y, components_indexes, estimation_input1) - @test length(coefs1) == 43 - @test length(ε1) == 10 - - estimation_input1 = Dict("α" => 0.1, "information_criteria" => "aic", "ϵ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true) - coefs1, ε1 = StateSpaceLearning.default_estimation_procedure(Estimation_X2, estimation_y, components_indexes2, estimation_input1) - @test length(coefs1) == 42 - @test length(ε1) == 10 - - estimation_input2 = Dict("α" => 0.1, "information_criteria" => "aic", "ϵ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => false) - coefs2, ε2 = StateSpaceLearning.default_estimation_procedure(Estimation_X, estimation_y, components_indexes, estimation_input2) - @test length(coefs2) == 43 - @test length(ε2) == 10 - @test all(coefs2[components_indexes["initial_states"][2:end] .- 1] .!= 0) -end - -@testset "Function: get_dummy_indexes" begin - Exogenous_X1 = hcat(rand(10, 3), vcat(zeros(3), ones(1), zeros(6))) - Exogenous_X2 = hcat(rand(10, 3)) - - dummy_indexes1 = StateSpaceLearning.get_dummy_indexes(Exogenous_X1) - @test dummy_indexes1 == [4] - - dummy_indexes2 = StateSpaceLearning.get_dummy_indexes(Exogenous_X2) - @test dummy_indexes2 == [] -end - -@testset "Function: get_outlier_duplicate_columns" begin - Random.seed!(1234) - Exogenous_X1 = hcat(rand(10, 3), vcat(zeros(3), ones(1), zeros(6))) - Exogenous_X2 = rand(10, 3) - - Basic_Structural = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - - components_indexes1 = StateSpaceLearning.get_components_indexes(Exogenous_X1, Basic_Structural) - components_indexes2 = StateSpaceLearning.get_components_indexes(Exogenous_X2, Basic_Structural) - - Estimation_X1 = StateSpaceLearning.create_X(Basic_Structural, Exogenous_X1) - outlier_duplicate_columns1 = StateSpaceLearning.get_outlier_duplicate_columns(Estimation_X1, components_indexes1) - @test outlier_duplicate_columns1 == [32] - - Estimation_X2 = StateSpaceLearning.create_X(Basic_Structural, Exogenous_X2) - outlier_duplicate_columns2 = StateSpaceLearning.get_outlier_duplicate_columns(Estimation_X2, components_indexes2) - @test outlier_duplicate_columns2 == [] -end \ No newline at end of file diff --git a/test/fit_forecast.jl b/test/fit_forecast.jl new file mode 100644 index 0000000..8cc428c --- /dev/null +++ b/test/fit_forecast.jl @@ -0,0 +1,81 @@ +@testset "Function: fit_model" begin + y1 = rand(100) + y2 = rand(100) + y2[10:20] .= NaN + + model1 = StateSpaceLearning.StructuralModel(y1) + StateSpaceLearning.fit!(model1) + + @test length(model1.output.ε) == 100 + @test length(model1.output.fitted) == 100 + @test length(model1.output.coefs) == 375 + @test length(model1.output.valid_indexes) == 100 + @test length(model1.output.residuals_variances) == 4 + @test length(keys(model1.output.components)) == 9 + + model2 = StateSpaceLearning.StructuralModel(y2) + StateSpaceLearning.fit!(model2) + + @test length(model2.output.ε) == 100 + @test length(model2.output.fitted) == 100 + @test length(model2.output.coefs) == 375 + @test length(model2.output.valid_indexes) == 89 + @test length(model2.output.residuals_variances) == 4 + @test length(keys(model2.output.components)) == 9 +end + +@testset "Function: forecast" begin + y1 = rand(100) + y2 = rand(100) + y2[10:20] .= NaN + + model1 = StateSpaceLearning.StructuralModel(y1) + StateSpaceLearning.fit!(model1) + @test length(StateSpaceLearning.forecast(model1, 10)) == 10 + + model2 = StateSpaceLearning.StructuralModel(y2; Exogenous_X=rand(100, 3)) + StateSpaceLearning.fit!(model2) + @test length(StateSpaceLearning.forecast(model2, 10; Exogenous_Forecast=rand(10, 3))) == + 10 + + @test_throws AssertionError StateSpaceLearning.forecast(model1, 10; + Exogenous_Forecast=rand(5, 3)) + @test_throws AssertionError StateSpaceLearning.forecast(model2, 10) + @test_throws AssertionError StateSpaceLearning.forecast(model2, 10; + Exogenous_Forecast=rand(5, 3)) + + y3 = [4.718, 4.77, 4.882, 4.859, 4.795, 4.905, 4.997, 4.997, 4.912, 4.779, 4.644, 4.77, + 4.744, 4.836, 4.948, 4.905, 4.828, 5.003, 5.135, 5.135, 5.062, 4.89, 4.736, 4.941, + 4.976, 5.01, 5.181, 5.093, 5.147, 5.181, 5.293, 5.293, 5.214, 5.087, 4.983, 5.111, + 5.141, 5.192, 5.262, 5.198, 5.209, 5.384, 5.438, 5.488, 5.342, 5.252, 5.147, + 5.267, 5.278, 5.278, 5.463, 5.459, 5.433, 5.493, 5.575, 5.605, 5.468, 5.351, + 5.192, 5.303, 5.318, 5.236, 5.459, 5.424, 5.455, 5.575, 5.71, 5.68, 5.556, 5.433, + 5.313, 5.433, 5.488, 5.451, 5.587, 5.594, 5.598, 5.752, 5.897, 5.849, 5.743, + 5.613, 5.468, 5.627, 5.648, 5.624, 5.758, 5.746, 5.762, 5.924, 6.023, 6.003, + 5.872, 5.723, 5.602, 5.723, 5.752, 5.707, 5.874, 5.852, 5.872, 6.045, 6.142, + 6.146, 6.001, 5.849, 5.72, 5.817, 5.828, 5.762, 5.891, 5.852, 5.894, 6.075, 6.196, + 6.224, 6.001, 5.883, 5.736, 5.82, 5.886, 5.834, 6.006, 5.981, 6.04, 6.156, 6.306, + 6.326, 6.137, 6.008, 5.891, 6.003, 6.033, 5.968, 6.037, 6.133, 6.156, 6.282, + 6.432, 6.406, 6.23, 6.133, 5.966, 6.068] + model3 = StateSpaceLearning.StructuralModel(y3) + StateSpaceLearning.fit!(model3) + forecast3 = trunc.(StateSpaceLearning.forecast(model3, 18); digits=3) + @assert forecast3 == + [6.11, 6.082, 6.221, 6.19, 6.197, 6.328, 6.447, 6.44, 6.285, 6.163, 6.026, + 6.142, 6.166, 6.138, 6.278, 6.246, 6.253, 6.384] +end + +@testset "Function: simulate" begin + y1 = rand(100) + y2 = rand(100) + y2[10:20] .= NaN + + model1 = StateSpaceLearning.StructuralModel(y1) + StateSpaceLearning.fit!(model1) + @test size(StateSpaceLearning.simulate(model1, 10, 100)) == (10, 100) + + model2 = StateSpaceLearning.StructuralModel(y2; Exogenous_X=rand(100, 3)) + StateSpaceLearning.fit!(model2) + @test size(StateSpaceLearning.simulate(model2, 10, 100; Exogenous_Forecast=rand(10, 3))) == + (10, 100) +end diff --git a/test/information_criteria.jl b/test/information_criteria.jl index bdd7f39..4976d89 100644 --- a/test/information_criteria.jl +++ b/test/information_criteria.jl @@ -1,11 +1,11 @@ -@testset "Function: get_information" begin +@testset "Function: get_information" begin ε = [1.1, 2.2, 3.3, 4.4, 5.5] T = 5 K = 3 - bic = StateSpaceLearning.get_information(T, K, ε; information_criteria = "bic") - aic = StateSpaceLearning.get_information(T, K, ε; information_criteria = "aic") - aicc = StateSpaceLearning.get_information(T, K, ε; information_criteria = "aicc") - @test round(bic, digits = 5) == 10.36287 - @test round(aic, digits = 5) == 11.53456 - @test round(aicc, digits = 5) == 35.53456 -end \ No newline at end of file + bic = StateSpaceLearning.get_information(T, K, ε; information_criteria="bic") + aic = StateSpaceLearning.get_information(T, K, ε; information_criteria="aic") + aicc = StateSpaceLearning.get_information(T, K, ε; information_criteria="aicc") + @test round(bic; digits=5) == 10.36287 + @test round(aic; digits=5) == 11.53456 + @test round(aicc; digits=5) == 35.53456 +end diff --git a/test/models/default_model.jl b/test/models/default_model.jl deleted file mode 100644 index 1c7beca..0000000 --- a/test/models/default_model.jl +++ /dev/null @@ -1,220 +0,0 @@ -@testset "Innovation matrices" begin - @test StateSpaceLearning.ξ_size(10) == 8 - @test StateSpaceLearning.ζ_size(10, 2) == 6 - @test StateSpaceLearning.ζ_size(10, 0) == 8 - @test StateSpaceLearning.ω_size(10, 2, 0) == 9 - @test StateSpaceLearning.ω_size(10, 2, 2) == 7 - @test StateSpaceLearning.o_size(10) == 10 - - X_ξ1 = StateSpaceLearning.create_ξ(5, 0) - X_ξ2 = StateSpaceLearning.create_ξ(5, 2) - - @test X_ξ1 == [0.0 0.0 0.0; - 1.0 0.0 0.0; - 1.0 1.0 0.0; - 1.0 1.0 1.0; - 1.0 1.0 1.0] - - @test X_ξ2 == [0.0 0.0 0.0; - 1.0 0.0 0.0; - 1.0 1.0 0.0; - 1.0 1.0 1.0; - 1.0 1.0 1.0; - 1.0 1.0 1.0; - 1.0 1.0 1.0] - - X_ζ1 = StateSpaceLearning.create_ζ(5, 0, 0) - X_ζ2 = StateSpaceLearning.create_ζ(5, 2, 0) - X_ζ3 = StateSpaceLearning.create_ζ(5, 2, 2) - - @test X_ζ1 == [0.0 0.0 0.0; - 0.0 0.0 0.0; - 1.0 0.0 0.0; - 2.0 1.0 0.0; - 3.0 2.0 1.0] - - @test X_ζ2 == [0.0 0.0 0.0; - 0.0 0.0 0.0; - 1.0 0.0 0.0; - 2.0 1.0 0.0; - 3.0 2.0 1.0; - 4.0 3.0 2.0; - 5.0 4.0 3.0] - - @test X_ζ3 == reshape([0.0; - 0.0; - 1.0; - 2.0; - 3.0; - 4.0; - 5.0], 7, 1) - - X_ω1 = StateSpaceLearning.create_ω(5, 2, 0, 0) - X_ω2 = StateSpaceLearning.create_ω(5, 2, 2, 0) - - @test X_ω1 == [0.0 0.0 0.0 0.0; - 0.0 0.0 0.0 0.0; - -1.0 1.0 0.0 0.0; - 0.0 -1.0 1.0 0.0; - -1.0 1.0 -1.0 1.0] - - @test X_ω2 == [0.0 0.0 0.0 0.0; - 0.0 0.0 0.0 0.0; - -1.0 1.0 0.0 0.0; - 0.0 -1.0 1.0 0.0; - -1.0 1.0 -1.0 1.0; - 0.0 -1.0 1.0 -1.0; - -1.0 1.0 -1.0 1.0] - - X_o1 = StateSpaceLearning.create_o_matrix(3, 0) - X_o2 = StateSpaceLearning.create_o_matrix(3, 2) - - @test X_o1 == Matrix(1.0 * I, 3, 3) - @test X_o2 == vcat(Matrix(1.0 * I, 3, 3), zeros(2, 3)) -end - - -@testset "Initial State Matrix" begin - X1 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 0, true, true, true) - X2 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 2, true, true, true) - - @test X1 == [1.0 0.0 1.0 0.0; - 1.0 1.0 0.0 1.0; - 1.0 2.0 1.0 0.0; - 1.0 3.0 0.0 1.0; - 1.0 4.0 1.0 0.0] - - @test X2 == [1.0 0.0 1.0 0.0; - 1.0 1.0 0.0 1.0; - 1.0 2.0 1.0 0.0; - 1.0 3.0 0.0 1.0; - 1.0 4.0 1.0 0.0; - 1.0 5.0 0.0 1.0; - 1.0 6.0 1.0 0.0] - - X3 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 0, true, true, false) - X4 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 2, true, true, false) - - @test X3 == [1.0 0.0; - 1.0 1.0; - 1.0 2.0; - 1.0 3.0; - 1.0 4.0] - - @test X4 == [1.0 0.0; - 1.0 1.0; - 1.0 2.0; - 1.0 3.0; - 1.0 4.0; - 1.0 5.0; - 1.0 6.0] - - X5 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 0, true, false, false) - X6 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 2, true, false, false) - - @test X5 == ones(5, 1) - @test X6 == ones(7, 1) - - X7 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 0, false, true, false) - X8 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 2, false, true, false) - - @test X7 == [0.0; 1.0; 2.0; 3.0; 4.0][:, :] - @test X8 == [0.0; 1.0; 2.0; 3.0; 4.0; 5.0; 6.0][:, :] -end - -@testset "Create X matrix" begin - Exogenous_X1 = rand(5, 3) - Exogenous_X2 = zeros(5, 0) - - Exogenous_forecast1 = rand(2, 3) - - param_combination = [ - Any[true, 0, 0], - Any[true, 2, 0], - Any[true, 2, 2], - Any[false, 0, 0], - Any[false, 2, 0], - Any[false, 2, 2] - ] - - size_vec1=[(5, 22), (5, 18), (7, 18), (5, 17), (5, 13), (7, 13), (5, 12), (5, 12), (7, 12), (5, 7), (5, 7), (7, 7), (5, 16), (5, 14), (7, 14), (5, 11), (5, 9), (7, 9), (5, 13), (5, 11), (7, 11), (5, 8), (5, 6), (7, 6)] - size_vec2=[(5, 19), (5, 15), (7, 15), (5, 14), (5, 10), (7, 10), (5, 9), (5, 9), (7, 9), (5, 4), (5, 4), (7, 4), (5, 13), (5, 11), (7, 11), (5, 8), (5, 6), (7, 6), (5, 10), (5, 8), (7, 8), (5, 5), (5, 3), (7, 3)] - counter = 1 - for model_input in [Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 12), - Dict("level"=> true, "stochastic_level" => true, "trend" => false, "stochastic_trend" => false, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 12), - Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 12), - Dict("level"=> true, "stochastic_level" => false, "trend" => true, "stochastic_trend" => true, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 12)] - for param in param_combination - model_input["outlier"] = param[1] - model_input["ζ_ω_threshold"] = param[2] - if param[3] != 0 - X1 = StateSpaceLearning.create_X(model_input, Exogenous_X1, param[3], Exogenous_forecast1) - else - X1 = StateSpaceLearning.create_X(model_input, Exogenous_X1, param[3]) - end - X2 = StateSpaceLearning.create_X(model_input, Exogenous_X2, param[3]) - @test size(X1) == size_vec1[counter] - @test size(X2) == size_vec2[counter] - counter += 1 - end - end -end - -@testset "Function: get_components" begin - Exogenous_X1 = rand(10, 3) - Exogenous_X2 = zeros(10, 0) - - Basic_Structural = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - Local_Level = Dict("level"=> true, "stochastic_level" => true, "trend" => false, "stochastic_trend" => false, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - Local_Linear_Trend = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - parameter_combination = [ - [Basic_Structural, true, Exogenous_X1], - [Local_Level, true, Exogenous_X1], - [Local_Linear_Trend, true, Exogenous_X1], - [Basic_Structural, false, Exogenous_X1], - [Basic_Structural, true, Exogenous_X2], - ] - - for param in parameter_combination - - param[1]["outlier"] = param[2] - - components_indexes = StateSpaceLearning.get_components_indexes(param[3], param[1]) - - for key in keys(components_indexes) - if param[1] == "Basic Structural" - @test !(key in ["Exogenous_X", "o"]) ? !isempty(components_indexes[key]) : true - elseif param[1] == "Local Level" - @test !(key in ["ω", "γ₁", "ζ", "ν₁", "o", "Exogenous_X"]) ? !isempty(components_indexes[key]) : true - @test !(key in ["o", "Exogenous_X", "μ₁", "ξ", "initial_states"]) ? isempty(components_indexes[key]) : true - elseif param[1] == "Local Linear Trend" - @test !(key in ["ω", "γ₁", "o", "Exogenous_X"]) ? !isempty(components_indexes[key]) : true - @test !(key in ["μ₁", "ξ", "ζ", "ν₁", "o", "Exogenous_X", "initial_states"]) ? isempty(components_indexes[key]) : true - end - @test (param[2] && key == "o") ? !isempty(components_indexes[key]) : true - @test (!param[2] && key == "o") ? isempty(components_indexes[key]) : true - @test key == "Exogenous_X" ? length(components_indexes[key]) == size(param[3], 2) : true - end - end -end - -@testset "Function: get_variances" begin - Exogenous_X2 = zeros(10, 0) - - Basic_Structural = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - Local_Level = Dict("level"=> true, "stochastic_level" => true, "trend" => false, "stochastic_trend" => false, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - Local_Linear_Trend = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - - parameter_combination = [ - [Basic_Structural, true, Exogenous_X2, ["ξ", "ζ", "ω", "ε"]], - [Local_Level, true, Exogenous_X2, ["ξ", "ε"]], - [Local_Linear_Trend, true, Exogenous_X2, ["ξ", "ζ", "ε"]] - ] - for param in parameter_combination - param[1]["outlier"] = param[2] - - components_indexes = StateSpaceLearning.get_components_indexes(param[3], param[1]) - variances = StateSpaceLearning.get_variances(rand(100), rand(39), components_indexes) - @test all([key in keys(variances) for key in param[4]]) - end -end \ No newline at end of file diff --git a/test/models/structural_model.jl b/test/models/structural_model.jl new file mode 100644 index 0000000..4a90e86 --- /dev/null +++ b/test/models/structural_model.jl @@ -0,0 +1,363 @@ +@testset "StructuralModel" begin + y1 = rand(100) + + model1 = StateSpaceLearning.StructuralModel(y1) + + @test typeof(model1) == StateSpaceLearning.StructuralModel + + @test_throws AssertionError StateSpaceLearning.StructuralModel(y1; freq_seasonal=1000) + + exog_error = ones(100, 3) + @test_throws AssertionError StateSpaceLearning.StructuralModel(y1; + Exogenous_X=exog_error) +end + +@testset "Innovation matrices" begin + @test StateSpaceLearning.ξ_size(10) == 8 + @test StateSpaceLearning.ζ_size(10, 2) == 6 + @test StateSpaceLearning.ζ_size(10, 0) == 8 + @test StateSpaceLearning.ω_size(10, 2, 0) == 9 + @test StateSpaceLearning.ω_size(10, 2, 2) == 7 + @test StateSpaceLearning.o_size(10) == 10 + + X_ξ1 = StateSpaceLearning.create_ξ(5, 0) + X_ξ2 = StateSpaceLearning.create_ξ(5, 2) + + @test X_ξ1 == [0.0 0.0 0.0; + 1.0 0.0 0.0; + 1.0 1.0 0.0; + 1.0 1.0 1.0; + 1.0 1.0 1.0] + + @test X_ξ2 == [0.0 0.0 0.0; + 1.0 0.0 0.0; + 1.0 1.0 0.0; + 1.0 1.0 1.0; + 1.0 1.0 1.0; + 1.0 1.0 1.0; + 1.0 1.0 1.0] + + X_ζ1 = StateSpaceLearning.create_ζ(5, 0, 0) + X_ζ2 = StateSpaceLearning.create_ζ(5, 2, 0) + X_ζ3 = StateSpaceLearning.create_ζ(5, 2, 2) + + @test X_ζ1 == [0.0 0.0 0.0; + 0.0 0.0 0.0; + 1.0 0.0 0.0; + 2.0 1.0 0.0; + 3.0 2.0 1.0] + + @test X_ζ2 == [0.0 0.0 0.0; + 0.0 0.0 0.0; + 1.0 0.0 0.0; + 2.0 1.0 0.0; + 3.0 2.0 1.0; + 4.0 3.0 2.0; + 5.0 4.0 3.0] + + @test X_ζ3 == reshape([0.0; + 0.0; + 1.0; + 2.0; + 3.0; + 4.0; + 5.0], 7, 1) + + X_ω1 = StateSpaceLearning.create_ω(5, 2, 0, 0) + X_ω2 = StateSpaceLearning.create_ω(5, 2, 2, 0) + + @test X_ω1 == [0.0 0.0 0.0 0.0; + 0.0 0.0 0.0 0.0; + -1.0 1.0 0.0 0.0; + 0.0 -1.0 1.0 0.0; + -1.0 1.0 -1.0 1.0] + + @test X_ω2 == [0.0 0.0 0.0 0.0; + 0.0 0.0 0.0 0.0; + -1.0 1.0 0.0 0.0; + 0.0 -1.0 1.0 0.0; + -1.0 1.0 -1.0 1.0; + 0.0 -1.0 1.0 -1.0; + -1.0 1.0 -1.0 1.0] + + X_o1 = StateSpaceLearning.create_o_matrix(3, 0) + X_o2 = StateSpaceLearning.create_o_matrix(3, 2) + + @test X_o1 == Matrix(1.0 * I, 3, 3) + @test X_o2 == vcat(Matrix(1.0 * I, 3, 3), zeros(2, 3)) +end + +@testset "Initial State Matrix" begin + X1 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 0, true, true, true) + X2 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 2, true, true, true) + + @test X1 == [1.0 0.0 1.0 0.0; + 1.0 1.0 0.0 1.0; + 1.0 2.0 1.0 0.0; + 1.0 3.0 0.0 1.0; + 1.0 4.0 1.0 0.0] + + @test X2 == [1.0 0.0 1.0 0.0; + 1.0 1.0 0.0 1.0; + 1.0 2.0 1.0 0.0; + 1.0 3.0 0.0 1.0; + 1.0 4.0 1.0 0.0; + 1.0 5.0 0.0 1.0; + 1.0 6.0 1.0 0.0] + + X3 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 0, true, true, false) + X4 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 2, true, true, false) + + @test X3 == [1.0 0.0; + 1.0 1.0; + 1.0 2.0; + 1.0 3.0; + 1.0 4.0] + + @test X4 == [1.0 0.0; + 1.0 1.0; + 1.0 2.0; + 1.0 3.0; + 1.0 4.0; + 1.0 5.0; + 1.0 6.0] + + X5 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 0, true, false, false) + X6 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 2, true, false, false) + + @test X5 == ones(5, 1) + @test X6 == ones(7, 1) + + X7 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 0, false, true, false) + X8 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 2, false, true, false) + + @test X7 == [0.0; 1.0; 2.0; 3.0; 4.0][:, :] + @test X8 == [0.0; 1.0; 2.0; 3.0; 4.0; 5.0; 6.0][:, :] +end + +@testset "Create X matrix" begin + Exogenous_X1 = rand(5, 3) + Exogenous_X2 = zeros(5, 0) + + Exogenous_forecast1 = rand(2, 3) + + param_combination = [Any[true, 0, 0], + Any[true, 2, 0], + Any[true, 2, 2], + Any[false, 0, 0], + Any[false, 2, 0], + Any[false, 2, 2]] + + size_vec1 = [(5, 22), (5, 18), (7, 18), (5, 17), (5, 13), (7, 13), (5, 12), (5, 12), + (7, 12), (5, 7), (5, 7), (7, 7), (5, 16), (5, 14), (7, 14), (5, 11), + (5, 9), (7, 9), (5, 13), (5, 11), (7, 11), (5, 8), (5, 6), (7, 6)] + size_vec2 = [(5, 19), (5, 15), (7, 15), (5, 14), (5, 10), (7, 10), (5, 9), (5, 9), + (7, 9), (5, 4), (5, 4), (7, 4), (5, 13), (5, 11), (7, 11), (5, 8), (5, 6), + (7, 6), (5, 10), (5, 8), (7, 8), (5, 5), (5, 3), (7, 3)] + counter = 1 + for args in [[true, true, true, true, true, true, 2, true, 12], + [true, true, false, false, false, false, 2, true, 12], + [true, true, true, true, false, false, 2, true, 12], + [true, false, true, true, false, false, 2, true, 12]] + args = [x in [0, 1] ? Bool(x) : x for x in args] + for param in param_combination + args[8] = param[1] + args[9] = param[2] + if param[3] != 0 + X1 = StateSpaceLearning.create_X(args..., Exogenous_X1, param[3], + Exogenous_forecast1) + else + X1 = StateSpaceLearning.create_X(args..., Exogenous_X1, param[3]) + end + X2 = StateSpaceLearning.create_X(args..., Exogenous_X2, param[3]) + @test size(X1) == size_vec1[counter] + @test size(X2) == size_vec2[counter] + counter += 1 + end + end +end + +@testset "Function: get_components" begin + Exogenous_X1 = rand(10, 3) + Exogenous_X2 = zeros(10, 0) + + Basic_Structural = StateSpaceLearning.StructuralModel(rand(10); level=true, + stochastic_level=true, trend=true, + stochastic_trend=true, + seasonal=true, + stochastic_seasonal=true, + freq_seasonal=2, outlier=true, + ζ_ω_threshold=0, + Exogenous_X=Exogenous_X1) + Local_Level = StateSpaceLearning.StructuralModel(rand(10); level=true, + stochastic_level=true, trend=false, + stochastic_trend=false, seasonal=false, + stochastic_seasonal=false, + freq_seasonal=2, outlier=true, + ζ_ω_threshold=0, + Exogenous_X=Exogenous_X1) + Local_Linear_Trend1 = StateSpaceLearning.StructuralModel(rand(10); level=true, + stochastic_level=true, + trend=true, + stochastic_trend=true, + seasonal=false, + stochastic_seasonal=false, + freq_seasonal=2, outlier=false, + ζ_ω_threshold=0, + Exogenous_X=Exogenous_X1) + Local_Linear_Trend2 = StateSpaceLearning.StructuralModel(rand(10); level=true, + stochastic_level=true, + trend=true, + stochastic_trend=true, + seasonal=false, + stochastic_seasonal=false, + freq_seasonal=2, outlier=false, + ζ_ω_threshold=0, + Exogenous_X=Exogenous_X2) + + models = [Basic_Structural, + Local_Level, + Local_Linear_Trend1, + Local_Linear_Trend2] + + empty_keys_vec = [[], + ["ν1", "ζ", "γ₁", "ω"], + ["γ₁", "ω", "o"], + ["γ₁", "ω", "o", "Exogenous_X"]] + + exogs = [Exogenous_X1, + Exogenous_X1, + Exogenous_X1, + Exogenous_X2] + + for idx in eachindex(models) + model = models[idx] + + components_indexes = StateSpaceLearning.get_components_indexes(model) + + for key in keys(components_indexes) + @test (key in empty_keys_vec[idx]) ? isempty(components_indexes[key]) : true + @test key == "Exogenous_X" ? + length(components_indexes[key]) == size(exogs[idx], 2) : true + end + end +end + +@testset "Function: get_variances" begin + Exogenous_X2 = zeros(10, 0) + + Basic_Structural = StateSpaceLearning.StructuralModel(rand(10); level=true, + stochastic_level=true, trend=true, + stochastic_trend=true, + seasonal=true, + stochastic_seasonal=true, + freq_seasonal=2, outlier=true, + ζ_ω_threshold=0, + Exogenous_X=Exogenous_X2) + Local_Level = StateSpaceLearning.StructuralModel(rand(10); level=true, + stochastic_level=true, trend=false, + stochastic_trend=false, seasonal=false, + stochastic_seasonal=false, + freq_seasonal=2, outlier=true, + ζ_ω_threshold=0, + Exogenous_X=Exogenous_X2) + Local_Linear_Trend = StateSpaceLearning.StructuralModel(rand(10); level=true, + stochastic_level=true, + trend=true, + stochastic_trend=true, + seasonal=false, + stochastic_seasonal=false, + freq_seasonal=2, outlier=false, + ζ_ω_threshold=0, + Exogenous_X=Exogenous_X2) + + models = [Basic_Structural, + Local_Level, + Local_Linear_Trend] + + params_vec = [["ξ", "ζ", "ω", "ε"], + ["ξ", "ε"], + ["ξ", "ζ", "ε"]] + + for idx in eachindex(models) + model = models[idx] + components_indexes = StateSpaceLearning.get_components_indexes(model) + variances = StateSpaceLearning.get_variances(model, rand(100), rand(39), + components_indexes) + @test all([key in keys(variances) for key in params_vec[idx]]) + end +end + +@testset "Function: get_model_innovations" begin + model1 = StateSpaceLearning.StructuralModel(rand(10); level=true, stochastic_level=true, + trend=true, stochastic_trend=true, + seasonal=true, stochastic_seasonal=true, + freq_seasonal=2, outlier=true, + ζ_ω_threshold=0, Exogenous_X=zeros(10, 0)) + model2 = StateSpaceLearning.StructuralModel(rand(10); level=true, + stochastic_level=false, trend=true, + stochastic_trend=true, seasonal=true, + stochastic_seasonal=true, freq_seasonal=2, + outlier=true, ζ_ω_threshold=0, + Exogenous_X=zeros(10, 0)) + model3 = StateSpaceLearning.StructuralModel(rand(10); level=true, stochastic_level=true, + trend=true, stochastic_trend=false, + seasonal=true, stochastic_seasonal=true, + freq_seasonal=2, outlier=true, + ζ_ω_threshold=0, Exogenous_X=zeros(10, 0)) + model4 = StateSpaceLearning.StructuralModel(rand(10); level=true, stochastic_level=true, + trend=true, stochastic_trend=true, + seasonal=true, stochastic_seasonal=false, + freq_seasonal=2, outlier=true, + ζ_ω_threshold=0, Exogenous_X=zeros(10, 0)) + + models = [model1, model2, model3, model4] + + keys_vec = [["ξ", "ζ", "ω"], + ["ζ", "ω"], + ["ξ", "ω"], + ["ξ", "ζ"]] + + for idx in eachindex(models) + model = models[idx] + model_innovations = StateSpaceLearning.get_model_innovations(model) + @test model_innovations == keys_vec[idx] + end +end + +@testset "Function: get_innovation_simulation_X" begin + innovation1 = "ξ" + innovation2 = "ζ" + innovation3 = "ω" + + model = StateSpaceLearning.StructuralModel(rand(3); level=true, stochastic_level=true, + trend=true, stochastic_trend=true, + seasonal=true, stochastic_seasonal=true, + freq_seasonal=2, outlier=true, + ζ_ω_threshold=0, Exogenous_X=zeros(10, 0)) + steps_ahead = 2 + + X1 = StateSpaceLearning.get_innovation_simulation_X(model, innovation1, steps_ahead) + @assert X1 == [0.0 0.0 0.0 0.0; + 1.0 0.0 0.0 0.0; + 1.0 1.0 0.0 0.0; + 1.0 1.0 1.0 0.0; + 1.0 1.0 1.0 1.0; + 1.0 1.0 1.0 1.0] + + X2 = StateSpaceLearning.get_innovation_simulation_X(model, innovation2, steps_ahead) + @assert X2 == [0.0 0.0 0.0; + 0.0 0.0 0.0; + 1.0 0.0 0.0; + 2.0 1.0 0.0; + 3.0 2.0 1.0; + 4.0 3.0 2.0] + + X3 = StateSpaceLearning.get_innovation_simulation_X(model, innovation3, steps_ahead) + @assert X3 == [0.0 0.0 0.0 0.0; + 0.0 0.0 0.0 0.0; + -1.0 1.0 0.0 0.0; + 0.0 -1.0 1.0 0.0; + -1.0 1.0 -1.0 1.0; + 0.0 -1.0 1.0 -1.0] +end diff --git a/test/runtests.jl b/test/runtests.jl index f945574..4113e16 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,7 +1,7 @@ using StateSpaceLearning, Test, Random, LinearAlgebra, GLMNet, Statistics -include("models/default_model.jl") +include("models/structural_model.jl") include("information_criteria.jl") -include("estimation_procedure/default_estimation_procedure.jl") +include("estimation_procedure.jl") include("utils.jl") -include("StateSpaceLearning.jl") +include("fit_forecast.jl") diff --git a/test/utils.jl b/test/utils.jl index 7a23532..bcb3c51 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -2,22 +2,50 @@ Exogenous_X1 = rand(10, 3) Exogenous_X2 = zeros(10, 0) - Basic_Structural = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - Local_Level = Dict("level"=> true, "stochastic_level" => true, "trend" => false, "stochastic_trend" => false, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - Local_Linear_Trend = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - parameter_combination = [ - [Basic_Structural, true, Exogenous_X1], - [Local_Level, true, Exogenous_X1], - [Local_Linear_Trend, true, Exogenous_X1], - [Basic_Structural, false, Exogenous_X1], - [Basic_Structural, true, Exogenous_X2], - ] - - for param in parameter_combination - param[1]["outlier"] = param[2] - X = StateSpaceLearning.create_X(param[1], param[3]) - - components_indexes = StateSpaceLearning.get_components_indexes(param[3], param[1]) + Basic_Structural = StateSpaceLearning.StructuralModel(rand(10); level=true, + stochastic_level=true, trend=true, + stochastic_trend=true, + seasonal=true, + stochastic_seasonal=true, + freq_seasonal=2, outlier=true, + ζ_ω_threshold=0, + Exogenous_X=Exogenous_X1) + Local_Level = StateSpaceLearning.StructuralModel(rand(10); level=true, + stochastic_level=true, trend=false, + stochastic_trend=false, seasonal=false, + stochastic_seasonal=false, + freq_seasonal=2, outlier=true, + ζ_ω_threshold=0, + Exogenous_X=Exogenous_X1) + Local_Linear_Trend1 = StateSpaceLearning.StructuralModel(rand(10); level=true, + stochastic_level=true, + trend=true, + stochastic_trend=true, + seasonal=false, + stochastic_seasonal=false, + freq_seasonal=2, outlier=false, + ζ_ω_threshold=0, + Exogenous_X=Exogenous_X1) + Local_Linear_Trend2 = StateSpaceLearning.StructuralModel(rand(10); level=true, + stochastic_level=true, + trend=true, + stochastic_trend=true, + seasonal=false, + stochastic_seasonal=false, + freq_seasonal=2, outlier=false, + ζ_ω_threshold=0, + Exogenous_X=Exogenous_X2) + + models = [Basic_Structural, + Local_Level, + Local_Linear_Trend1, + Local_Linear_Trend2] + + for idx in eachindex(models) + model = models[idx] + X = model.X + + components_indexes = StateSpaceLearning.get_components_indexes(model) coefs = rand(size(X, 2)) components = StateSpaceLearning.build_components(X, coefs, components_indexes) @@ -28,25 +56,25 @@ @test key == "Exogenous_X" ? "Selected" in keys(components[key]) : true end end - end @testset "Function: get_fit_and_residuals" begin - coefs = rand(10) X = rand(30, 10) T = 30 valid_indexes1 = setdiff(collect(1:30), [11, 12]) estimation_ε1 = rand(length(valid_indexes1)) - ε1, fitted1 = StateSpaceLearning.get_fit_and_residuals(estimation_ε1, coefs, X, valid_indexes1, T) + ε1, fitted1 = StateSpaceLearning.get_fit_and_residuals(estimation_ε1, coefs, X, + valid_indexes1, T) @test all(isnan.(ε1[11:12])) @test !all(isnan.(ε1[valid_indexes1])) @test !all(isnan.(fitted1)) valid_indexes2 = collect(1:30) estimation_ε2 = rand(length(valid_indexes2)) - ε2, fitted2 = StateSpaceLearning.get_fit_and_residuals(estimation_ε2, coefs, X, valid_indexes2, T) + ε2, fitted2 = StateSpaceLearning.get_fit_and_residuals(estimation_ε2, coefs, X, + valid_indexes2, T) @test !all(isnan.(ε2[valid_indexes2])) @test !all(isnan.(fitted2)) end @@ -57,4 +85,18 @@ end X = [ones(10) rand(10, 2)] @test StateSpaceLearning.has_intercept(X) -end \ No newline at end of file +end + +@testset "Function: fill_innovation_coefs" begin + model = StateSpaceLearning.StructuralModel(rand(100)) + StateSpaceLearning.fit!(model) + components = ["ξ", "ζ", "ω"] + + inov_comp1 = StateSpaceLearning.fill_innovation_coefs(model, components[1]) + inov_comp2 = StateSpaceLearning.fill_innovation_coefs(model, components[2]) + inov_comp3 = StateSpaceLearning.fill_innovation_coefs(model, components[3]) + + @test length(inov_comp1) == 100 + @test length(inov_comp2) == 100 + @test length(inov_comp3) == 100 +end