diff --git a/README.md b/README.md index 6926778..1f6c138 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,107 @@ # StateSpaceLearning -[![ci](https://github.com/LAMPSPUC/StateSpaceLearning/actions/workflows/ci.yml/badge.svg)](https://github.com/LAMPSPUC/StateSpaceLearning/actions/workflows/ci.yml) +| **Build Status** | **Coverage** | +|:-----------------:|:-----------------:| +| [![ci](https://github.com/LAMPSPUC/StateSpaceLearning/actions/workflows/ci.yml/badge.svg)](https://github.com/LAMPSPUC/StateSpaceLearning/actions/workflows/ci.yml) | [![codecov](https://codecov.io/gh/LAMPSPUC/StateSpaceLearning/graph/badge.svg?token=VDpuXvPSI2)](https://codecov.io/gh/LAMPSPUC/StateSpaceLearning) | -[![codecov](https://codecov.io/gh/LAMPSPUC/StateSpaceLearning/graph/badge.svg?token=VDpuXvPSI2)](https://codecov.io/gh/LAMPSPUC/StateSpaceLearning) -StateSpaceLearning.jl is a package for modeling and forecasting time series in a high-dimension regression framework. \ No newline at end of file +StateSpaceLearning.jl is a package for modeling and forecasting time series in a high-dimension regression framework. + +## Quickstart + +```julia +using StateSpaceLearning + +y = randn(100) + +#Fit Model +output = StateSpaceLearning.fit_model(y) + +#Main output options +model_type = output.model_type # State Space Equivalent model utilized in the estimation (default = Basic Structural). +X = output.X # High Dimension Regression utilized in the estimation. +coefs = output.coefs # High Dimension Regression coefficients estimated in the estimation. +ϵ = output.ϵ # Residuals of the model. +fitted = output.fitted # Fit in Sample of the model. +components = output.components # Dictionary containing information about each component of the model, each component has the keys: "Values" (The value of the component in each timestamp) , "Coefs" (The coefficients estimated for each element of the component) and "Indexes" (The indexes of the elements of the component in the high dimension regression "X"). +residuals_variances = output.residuals_variances # Dictionary containing the estimated variances for the innovations components (that is the information that can be utilized to initialize the state space model). +s = ouotput.s # The seasonal frequency utilized in the model (default = 12). +T = output.T # The length of the original time series. +outlier = output.outlier # Boolean indicating the presence of outlier component (default = false). +valid_indexes = output.valid_indexes # Vector containing valid indexes of the time series (non valid indexes represent NaN values in the time series). +stabilize_ζ = output.stabilize_ζ # Stabilize_ζ parameter (default = 0). A non 0 value for this parameter might be important in terms of forecast for some time series to lead to more stable predictions (we recommend stabilize_ζ = 11 for monthly series). + +#Forecast +prediction = StateSpaceLearning.forecast(output, 12) #Gets a 12 steps ahead prediction + +``` + +## Features + +Current features include: +* Estimation +* Components decomposition +* Forecasting +* Completion of missing values +* Predefined models, including: + * Basic Structural" + * Local Linear Trend + * Local Level + +## Quick Examples + +### Fitting and forecasting +Quick example of fit and forecast for the air passengers time-series. + +```julia +using CSV +using DataFrames +using Plots +using StateSpaceModels + +airp = CSV.File(StateSpaceModels.AIR_PASSENGERS) |> DataFrame +log_air_passengers = log.(airp.passengers) +steps_ahead = 30 + +output = StateSpaceLearning.fit_model(log_air_passengers) +prediction_raw = StateSpaceLearning.forecast(output, steps_ahead) +prediction = exp.(prediction_raw) + +plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom) +plot!(vcat(ones(output.T).*NaN, prediction), lab = "Forcast", w=2, color = "blue") +``` +![quick_example_airp](./docs/assets/quick_example_airp.png) + +### Completion of missing values +Quick example of completion of missing values for the air passengers time-series (artificial NaN values are added to the original time-series). + +```julia +using CSV +using DataFrames +using Plots +using StateSpaceModels + +airp = CSV.File(StateSpaceModels.AIR_PASSENGERS) |> DataFrame +airpassengers = (airp.passengers).*1.0 +log_air_passengers = log.(airpassengers) +steps_ahead = 30 + +log_air_passengers[60:72] .= NaN + +output = StateSpaceLearning.fit_model(log_air_passengers) + +fitted_completed_missing_values = ones(144).*NaN; fitted_completed_missing_values[60:72] = exp.(output.fitted[60:72]) +real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airpassengers[60:72]) +airpassengers[60:72] .= NaN + +plot(airpassengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom) +plot!(real_removed_valued, lab = "Real Removed Values", w=2, color = "red") +plot!(fitted_completed_missing_values, lab = "Fit in Sample completed values", w=2, color = "blue") + +``` +![quick_example_completion_airp](./docs/assets/quick_example_completion_airp.png) + +## Contributing + +* PRs such as adding new models and fixing bugs are very welcome! +* For nontrivial changes, you'll probably want to first discuss the changes via issue. \ No newline at end of file diff --git a/docs/assets/quick_example_airp.PNG b/docs/assets/quick_example_airp.PNG new file mode 100644 index 0000000..960b350 Binary files /dev/null and b/docs/assets/quick_example_airp.PNG differ diff --git a/docs/assets/quick_example_completion_airp.PNG b/docs/assets/quick_example_completion_airp.PNG new file mode 100644 index 0000000..c4bbbea Binary files /dev/null and b/docs/assets/quick_example_completion_airp.PNG differ diff --git a/src/StateSpaceLearning.jl b/src/StateSpaceLearning.jl index 59561de..24586a1 100644 --- a/src/StateSpaceLearning.jl +++ b/src/StateSpaceLearning.jl @@ -16,6 +16,31 @@ include("utils.jl") export fit_model, forecast +""" + fit_model(y::Vector{Fl}; model_type::String="Basic Structural", Exogenous_X::Union{Matrix{Fl}, Missing}=missing, + estimation_procedure::String="AdaLasso", s::Int64=12, outlier::Bool=false, stabilize_ζ::Int64=0, + α::Float64=0.1, hyperparameter_selection::String="aic", adalasso_coef::Float64=0.1, + select_exogenous::Bool=true)::Output where Fl + + Fits the StateSpaceLearning model using specified parameters and estimation procedures. + + # Arguments + - `y::Vector{Fl}`: Vector of data. + - `model_type::String`: Type of model (default: "Basic Structural"). + - `Exogenous_X::Union{Matrix{Fl}, Missing}`: Exogenous variables matrix (default: missing). + - `estimation_procedure::String`: Estimation procedure (default: "AdaLasso"). + - `s::Int64`: Seasonal period (default: 12). + - `outlier::Bool`: Flag for considering outlier component (default: false). + - `stabilize_ζ::Int64`: Stabilize_ζ parameter (default: 0). + - `α::Float64`: Elastic net control factor between ridge (α=0) and lasso (α=1) (default: 0.1). + - `hyperparameter_selection::String`: Information criteria method for hyperparameter selection (default: "aic"). + - `adalasso_coef::Float64`: AdaLasso adjustment coefficient (default: 0.1). + - `select_exogenous::Bool`: Flag to select exogenous variables. When false the penalty factor for these variables will be set to 0 (default: true). + + # Returns + - `Output`: Output object containing model information, coefficients, residuals, etc. + +""" function fit_model(y::Vector{Fl}; model_type::String="Basic Structural", Exogenous_X::Union{Matrix{Fl}, Missing}=missing, estimation_procedure::String="AdaLasso", s::Int64=12, outlier::Bool=false, stabilize_ζ::Int64=0, α::Float64=0.1, hyperparameter_selection::String="aic", adalasso_coef::Float64=0.1, select_exogenous::Bool=true)::Output where Fl @@ -25,7 +50,7 @@ function fit_model(y::Vector{Fl}; model_type::String="Basic Structural", Exogeno @assert (model_type in AVAILABLE_MODELS) "Unavailable Model" @assert (estimation_procedure in AVAILABLE_ESTIMATION_PROCEDURES) "Unavailable estimation procedure" @assert (hyperparameter_selection in AVAILABLE_HYPERPARAMETER_SELECTION) "Unavailable hyperparameter selection method" - @assert 0 < α <= 1 "α must be in (0, 1], Lasso.jl cannot handle α = 0" + @assert 0 <= α <= 1 "α must be in (0, 1], Lasso.jl cannot handle α = 0" valid_indexes = findall(i -> !isnan(i), y) estimation_y = y[valid_indexes] @@ -48,6 +73,20 @@ function fit_model(y::Vector{Fl}; model_type::String="Basic Structural", Exogeno return Output(model_type, X, coefs, ϵ, fitted, components, residuals_variances, s, T, outlier, valid_indexes, stabilize_ζ) end +""" + forecast(output::Output, steps_ahead::Int64; Exogenous_Forecast::Union{Matrix{Fl}, Missing}=missing)::Vector{Float64} where Fl + + Returns the forecast for a given number of steps ahead using the provided StateSpaceLearning output and exogenous forecast data. + + # Arguments + - `output::Output`: Output object obtained from model fitting. + - `steps_ahead::Int64`: Number of steps ahead for forecasting. + - `Exogenous_Forecast::Union{Matrix{Fl}, Missing}`: Exogenous variables forecast (default: missing). + + # Returns + - `Vector{Float64}`: Vector containing forecasted values. + +""" function forecast(output::Output, steps_ahead::Int64; Exogenous_Forecast::Union{Matrix{Fl}, Missing}=missing)::Vector{Float64} where Fl @assert steps_ahead > 0 "steps_ahead must be a positive integer" Exogenous_Forecast = ismissing(Exogenous_Forecast) ? zeros(steps_ahead, 0) : Exogenous_Forecast diff --git a/src/estimation_procedure/adalasso.jl b/src/estimation_procedure/adalasso.jl index 73fec27..0793468 100644 --- a/src/estimation_procedure/adalasso.jl +++ b/src/estimation_procedure/adalasso.jl @@ -1,3 +1,24 @@ +""" + fit_adalasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64, + hyperparameter_selection::String, + components_indexes::Dict{String, Vector{Int64}}, + adalasso_coef::Float64, select_exogenous::Bool)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} + + Fits an Adaptive Lasso (AdaLasso) regression model to the provided data and returns coefficients and residuals. + + # Arguments + - `Estimation_X::Matrix{Tl}`: Matrix of predictors for estimation. + - `estimation_y::Vector{Fl}`: Vector of response values for estimation. + - `α::Float64`: Elastic net control factor between ridge (α=0) and lasso (α=1) (default: 0.1). + - `hyperparameter_selection::String`: Information Criteria method for hyperparameter selection (default: aic). + - `components_indexes::Dict{String, Vector{Int64}}`: Dictionary containing indexes for different components. + - `adalasso_coef::Float64`: AdaLasso adjustment coefficient (default: 0.1). + - `select_exogenous::Bool`: Flag for selecting exogenous variables. When false the penalty factor for these variables will be set to 0. + + # Returns + - `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the fitted AdaLasso model. + +""" function fit_adalasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64, hyperparameter_selection::String, components_indexes::Dict{String, Vector{Int64}}, @@ -12,7 +33,7 @@ function fit_adalasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Fl if key != "initial_states" && key != "μ₁" component = components_indexes[key] if key != "Exogenous_X" && key != "o" && !(key in ["ν₁", "γ₁"]) - κ = count(i -> i == 0, coefs[component]) < 1 ? 0 : std(coefs[component]) + κ = count(i -> i != 0, coefs[component]) < 1 ? 0 : std(coefs[component]) penalty_factor[component .- 1] .= (1 / (κ + adalasso_coef)) else penalty_factor[component .- 1] = (1 ./ (abs.(coefs[component]) .+ adalasso_coef)) diff --git a/src/estimation_procedure/estimation_utils.jl b/src/estimation_procedure/estimation_utils.jl index 86cbb9d..e8c3531 100644 --- a/src/estimation_procedure/estimation_utils.jl +++ b/src/estimation_procedure/estimation_utils.jl @@ -1,3 +1,15 @@ +""" + get_dummy_indexes(Exogenous_X::Matrix{Fl}) where {Fl} + + Identifies and returns the indexes of columns in the exogenous matrix that contain dummy variables. + + # Arguments + - `Exogenous_X::Matrix{Fl}`: Exogenous variables matrix. + + # Returns + - `Vector{Int}`: Vector containing the indexes of columns with dummy variables. + +""" function get_dummy_indexes(Exogenous_X::Matrix{Fl}) where{Fl} T, p = size(Exogenous_X) @@ -12,6 +24,19 @@ function get_dummy_indexes(Exogenous_X::Matrix{Fl}) where{Fl} return dummy_indexes end +""" + get_outlier_duplicate_columns(Estimation_X::Matrix{Tl}, components_indexes::Dict{String, Vector{Int64}}) where{Tl} + + Identifies and returns the indexes of outlier columns that are duplicates of dummy variables in the exogenous matrix. + + # Arguments + - `Estimation_X::Matrix{Tl}`: Matrix used for estimation. + - `components_indexes::Dict{String, Vector{Int64}}`: Dictionary containing indexes for different components. + + # Returns + - `Vector{Int}`: Vector containing the indexes of outlier columns that are duplicates of dummy variables in the exogenous matrix. + +""" function get_outlier_duplicate_columns(Estimation_X::Matrix{Tl}, components_indexes::Dict{String, Vector{Int64}}) where{Tl} o_indexes = components_indexes["o"] exogenous_indexes = components_indexes["Exogenous_X"] @@ -21,6 +46,27 @@ function get_outlier_duplicate_columns(Estimation_X::Matrix{Tl}, components_inde return o_indexes[dummy_indexes] .- 1 end +""" + fit_estimation_procedure(estimation_procedure::String, Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64, + hyperparameter_selection::String, components_indexes::Dict{String, Vector{Int64}}, + adalasso_coef::Float64, select_exogenous::Bool)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} + + Fits the specified estimation procedure (currently Lasso or AdaLasso) to the provided data and returns coefficients and residuals. + + # Arguments + - `estimation_procedure::String`: The chosen estimation procedure (either "Lasso" or "AdaLasso"). + - `Estimation_X::Matrix{Tl}`: Matrix of predictors for estimation. + - `estimation_y::Vector{Fl}`: Vector of response values for estimation. + - `α::Float64`: Elastic net control factor between ridge (α=0) and lasso (α=1) (default: 0.1). + - `hyperparameter_selection::String`: Information Criteria method for hyperparameter selection (default: aic). + - `components_indexes::Dict{String, Vector{Int64}}`: Dictionary containing indexes for different components. + - `adalasso_coef::Float64`: AdaLasso adjustment coefficient (default: 0.1). + - `select_exogenous::Bool`: Flag for selecting exogenous variables. When false the penalty factor for these variables will be set to 0. + + # Returns + - `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the fitted estimation procedure. + +""" function fit_estimation_procedure(estimation_procedure::String, Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64, hyperparameter_selection::String, components_indexes::Dict{String, Vector{Int64}}, adalasso_coef::Float64, select_exogenous::Bool)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} diff --git a/src/estimation_procedure/information_criteria.jl b/src/estimation_procedure/information_criteria.jl index f8be846..b1b4f43 100644 --- a/src/estimation_procedure/information_criteria.jl +++ b/src/estimation_procedure/information_criteria.jl @@ -1,3 +1,20 @@ +""" + get_information(T::Int64, K::Int64, ϵ::Vector{Float64}; + hyperparameter_selection::String = "bic", p::Int64 = 0)::Float64 + + Calculates information criterion value based on the provided parameters and residuals. + + # Arguments + - `T::Int64`: Number of observations. + - `K::Int64`: Number of selected predictors. + - `ϵ::Vector{Float64}`: Vector of residuals. + - `hyperparameter_selection::String`: Method for hyperparameter selection (default: "bic"). + - `p::Int64`: Number of total predictors (default: 0). + + # Returns + - `Float64`: Information criterion value. + +""" function get_information(T::Int64, K::Int64, ϵ::Vector{Float64}; hyperparameter_selection::String = "bic", p::Int64 = 0)::Float64 if hyperparameter_selection == "bic" return T*log(var(ϵ)) + K*log(T) diff --git a/src/estimation_procedure/lasso.jl b/src/estimation_procedure/lasso.jl index 8863d43..16852a7 100644 --- a/src/estimation_procedure/lasso.jl +++ b/src/estimation_procedure/lasso.jl @@ -1,3 +1,20 @@ +""" + get_path_information_criteria(model::GLMNetPath, Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, + hyperparameter_selection::String; intercept::Bool = true)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} + + Calculates the information criteria along the regularization path of a GLMNet model and returns coefficients and residuals of the best model based on the selected information criteria. + + # Arguments + - `model::GLMNetPath`: Fitted GLMNetPath model object. + - `Estimation_X::Matrix{Tl}`: Matrix of predictors for estimation. + - `estimation_y::Vector{Fl}`: Vector of response values for estimation. + - `hyperparameter_selection::String`: Information Criteria method for hyperparameter selection. + - `intercept::Bool`: Flag for intercept inclusion in the model (default: true). + + # Returns + - `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the best model. + +""" function get_path_information_criteria(model::GLMNetPath, Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, hyperparameter_selection::String; intercept::Bool = true)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} path_size = length(model.lambda) T, p = size(Estimation_X) @@ -18,11 +35,52 @@ function get_path_information_criteria(model::GLMNetPath, Estimation_X::Matrix{T return coefs, ϵ end +""" + fit_glmnet(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64; + hyperparameter_selection::String = "aic", + penalty_factor::Vector{Float64}=ones(size(Estimation_X,2) - 1), + intercept::Bool = intercept)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} + + Fits a GLMNet model to the provided data and returns coefficients and residuals based on selected criteria. + + # Arguments + - `Estimation_X::Matrix{Tl}`: Matrix of predictors for estimation. + - `estimation_y::Vector{Fl}`: Vector of response values for estimation. + - `α::Float64`: Elastic net control factor between ridge (α=0) and lasso (α=1) (default: 0.1). + - `hyperparameter_selection::String`: Information Criteria method for hyperparameter selection (default: aic). + - `penalty_factor::Vector{Float64}`: Penalty factors for each predictor (default: ones(size(Estimation_X, 2) - 1)). + - `intercept::Bool`: Flag for intercept inclusion in the model (default: true). + + # Returns + - `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the best model. + +""" function fit_glmnet(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64; hyperparameter_selection::String = "aic", penalty_factor::Vector{Float64}=ones(size(Estimation_X,2) - 1), intercept::Bool = intercept)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} model = glmnet(Estimation_X, estimation_y, alpha = α, penalty_factor = penalty_factor, intercept = intercept, dfmax=size(Estimation_X, 2), lambda_min_ratio=0.001) return get_path_information_criteria(model, Estimation_X, estimation_y, hyperparameter_selection; intercept = intercept) end +""" + fit_lasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64, hyperparameter_selection::String, + select_exogenous::Bool, components_indexes::Dict{String, Vector{Int64}}; + penalty_factor::Vector{Float64}=ones(size(Estimation_X,2) - 1), intercept::Bool = true)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} + + Fits a Lasso regression model to the provided data and returns coefficients and residuals based on selected criteria. + + # Arguments + - `Estimation_X::Matrix{Tl}`: Matrix of predictors for estimation. + - `estimation_y::Vector{Fl}`: Vector of response values for estimation. + - `α::Float64`: Elastic net control factor between ridge (α=0) and lasso (α=1) (default: 0.1). + - `hyperparameter_selection::String`: Information Criteria method for hyperparameter selection (default: aic). + - `select_exogenous::Bool`: Flag for selecting exogenous variables. When false the penalty factor for these variables will be set to 0. + - `components_indexes::Dict{String, Vector{Int64}}`: Dictionary containing indexes for different components. + - `penalty_factor::Vector{Float64}`: Penalty factors for each predictor (default: ones(size(Estimation_X, 2) - 1)). + - `intercept::Bool`: Flag for intercept inclusion in the model (default: true). + + # Returns + - `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the fitted Lasso model. + +""" function fit_lasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64, hyperparameter_selection::String, select_exogenous::Bool, components_indexes::Dict{String, Vector{Int64}}; penalty_factor::Vector{Float64}=ones(size(Estimation_X,2) - 1), intercept::Bool = true)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} outlier_duplicate_columns = get_outlier_duplicate_columns(Estimation_X, components_indexes) diff --git a/src/model_utils.jl b/src/model_utils.jl index 959e3f2..a7ef5f5 100644 --- a/src/model_utils.jl +++ b/src/model_utils.jl @@ -1,11 +1,74 @@ +""" + ξ_size(T::Int64)::Int64 + + Calculates the size of ξ innovation matrix based on the input T. + + # Arguments + - `T::Int64`: Length of the original time series. + + # Returns + - `Int64`: Size of ξ calculated from T. + +""" ξ_size(T::Int64)::Int64 = T-2 +""" +ζ_size(T::Int64, stabilize_ζ::Int64)::Int64 + + Calculates the size of ζ innovation matrix based on the input T. + + # Arguments + - `T::Int64`: Length of the original time series. + - `stabilize_ζ::Int64`: Stabilize parameter ζ. + + # Returns + - `Int64`: Size of ζ calculated from T. + +""" ζ_size(T::Int64, stabilize_ζ::Int64)::Int64 = T-stabilize_ζ-1 +""" +ω_size(T::Int64, s::Int64)::Int64 + + Calculates the size of ω innovation matrix based on the input T. + + # Arguments + - `T::Int64`: Length of the original time series. + - `s::Int64`: Seasonal period. + + # Returns + - `Int64`: Size of ω calculated from T. + +""" ω_size(T::Int64, s::Int64)::Int64 = T-s +""" +o_size(T::Int64)::Int64 + + Calculates the size of outlier matrix based on the input T. + + # Arguments + - `T::Int64`: Length of the original time series. + + # Returns + - `Int64`: Size of o calculated from T. + +""" o_size(T::Int64)::Int64 = T +""" + create_ξ(T::Int64, steps_ahead::Int64)::Matrix + + Creates a matrix of innovations ξ based on the input sizes, and the desired steps ahead (this is necessary for the forecast function) + + # Arguments + - `T::Int64`: Length of the original time series. + - `steps_ahead::Int64`: Number of steps ahead (for estimation purposes this should be set at 0). + + # Returns + - `Matrix`: Matrix of innovations ξ constructed based on the input sizes. + +""" function create_ξ(T::Int64, steps_ahead::Int64)::Matrix ξ_matrix = Matrix{Float64}(undef, T+steps_ahead, ξ_size(T)) for t in 1:T+steps_ahead @@ -15,6 +78,20 @@ function create_ξ(T::Int64, steps_ahead::Int64)::Matrix return ξ_matrix end +""" +create_ζ(T::Int64, steps_ahead::Int64, stabilize_ζ::Int64)::Matrix + + Creates a matrix of innovations ζ based on the input sizes, and the desired steps ahead (this is necessary for the forecast function). + + # Arguments + - `T::Int64`: Length of the original time series. + - `steps_ahead::Int64`: Number of steps ahead (for estimation purposes this should be set at 0). + - `stabilize_ζ::Int64`: Stabilize parameter ζ. + + # Returns + - `Matrix`: Matrix of innovations ζ constructed based on the input sizes. + +""" function create_ζ(T::Int64, steps_ahead::Int64, stabilize_ζ::Int64)::Matrix ζ_matrix = Matrix{Float64}(undef, T+steps_ahead, ζ_size(T, stabilize_ζ) + stabilize_ζ) for t in 1:T+steps_ahead @@ -23,6 +100,20 @@ function create_ζ(T::Int64, steps_ahead::Int64, stabilize_ζ::Int64)::Matrix return ζ_matrix[:, 1:end-stabilize_ζ] end +""" +create_ω(T::Int64, s::Int64, steps_ahead::Int64)::Matrix + + Creates a matrix of innovations ω based on the input sizes, and the desired steps ahead (this is necessary for the forecast function). + + # Arguments + - `T::Int64`: Length of the original time series. + - `s::Int64`: Seasonal period. + - `steps_ahead::Int64`: Number of steps ahead (for estimation purposes this should be set at 0). + + # Returns + - `Matrix`: Matrix of innovations ω constructed based on the input sizes. + +""" function create_ω(T::Int64, s::Int64, steps_ahead::Int64)::Matrix ω_matrix_size = ω_size(T, s) ω_matrix = Matrix{Float64}(undef, T+steps_ahead, ω_matrix_size) @@ -38,10 +129,38 @@ function create_ω(T::Int64, s::Int64, steps_ahead::Int64)::Matrix return ω_matrix end +""" +create_o_matrix(T::Int64, steps_ahead::Int64)::Matrix + + Creates a matrix of outliers based on the input sizes, and the desired steps ahead (this is necessary for the forecast function). + + # Arguments + - `T::Int64`: Length of the original time series. + - `steps_ahead::Int64`: Number of steps ahead (for estimation purposes this should be set at 0). + + # Returns + - `Matrix`: Matrix of outliers constructed based on the input sizes. + +""" function create_o_matrix(T::Int64, steps_ahead::Int64)::Matrix return vcat(Matrix(1.0 * I, T, T), zeros(steps_ahead, T)) end +""" + create_initial_states_Matrix(T::Int64, s::Int64, steps_ahead::Int64, model_type::String)::Matrix + + Creates an initial states matrix based on the input parameters. + + # Arguments + - `T::Int64`: Length of the original time series. + - `s::Int64`: Seasonal period. + - `steps_ahead::Int64`: Number of steps ahead. + - `model_type::String`: Type of model. + + # Returns + - `Matrix`: Initial states matrix constructed based on the input parameters. + +""" function create_initial_states_Matrix(T::Int64, s::Int64, steps_ahead::Int64, model_type::String)::Matrix μ₀_coefs = ones(T+steps_ahead) ν₀_coefs = collect(1:T+steps_ahead) @@ -61,6 +180,25 @@ function create_initial_states_Matrix(T::Int64, s::Int64, steps_ahead::Int64, mo end +""" + create_X(model_type::String, T::Int64, s::Int64, Exogenous_X::Matrix{Fl}, outlier::Bool, stabilize_ζ::Int64, + steps_ahead::Int64=0, Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, size(Exogenous_X, 2))) where Fl + + Creates the StateSpaceLearning matrix X based on the model type and input parameters. + + # Arguments + - `model_type::String`: Type of model. + - `T::Int64`: Length of the original time series. + - `s::Int64`: Seasonal period. + - `Exogenous_X::Matrix{Fl}`: Exogenous variables matrix. + - `outlier::Bool`: Flag for considering outlier component. + - `stabilize_ζ::Int64`: Stabilize parameter for ζ matrix. + - `steps_ahead::Int64`: Number of steps ahead (default: 0). + - `Exogenous_Forecast::Matrix{Fl}`: Exogenous variables forecast matrix (default: zeros). + + # Returns + - `Matrix`: StateSpaceLearning matrix X constructed based on the input parameters. +""" function create_X(model_type::String, T::Int64, s::Int64, Exogenous_X::Matrix{Fl}, outlier::Bool, stabilize_ζ::Int64, steps_ahead::Int64=0, Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, size(Exogenous_X, 2))) where Fl @@ -80,6 +218,23 @@ function create_X(model_type::String, T::Int64, s::Int64, Exogenous_X::Matrix{Fl end +""" + get_components_indexes(T::Int64, s::Int64, Exogenous_X::Matrix{Fl}, outlier::Bool, model_type::String, stabilize_ζ::Int64)::Dict where Fl + + Generates indexes dict for different components based on the model type and input parameters. + + # Arguments + - `T::Int64`: Length of the original time series. + - `s::Int64`: Seasonal period. + - `Exogenous_X::Matrix{Fl}`: Exogenous variables matrix. + - `outlier::Bool`: Flag for considering outlier component. + - `model_type::String`: Type of model. + - `stabilize_ζ::Int64`: Stabilize parameter for ζ matrix. + + # Returns + - `Dict`: Dictionary containing the corresponding indexes for each component of the model. + +""" function get_components_indexes(T::Int64, s::Int64, Exogenous_X::Matrix{Fl}, outlier::Bool, model_type::String, stabilize_ζ::Int64)::Dict where Fl μ₁_indexes = [1] ν₁_indexes = model_type in ["Local Linear Trend", "Basic Structural"] ? [2] : Int64[] @@ -110,6 +265,20 @@ function get_components_indexes(T::Int64, s::Int64, Exogenous_X::Matrix{Fl}, out "Exogenous_X" => exogenous_indexes, "initial_states" => initial_states_indexes) end +""" + get_variances(ϵ::Vector{Fl}, coefs::Vector{Fl}, components_indexes::Dict{String, Vector{Int64}})::Dict where Fl + + Calculates variances for each innovation component based on input data. + + # Arguments + - `ϵ::Vector{Fl}`: Vector of residuals. + - `coefs::Vector{Fl}`: Vector of coefficients. + - `components_indexes::Dict{String, Vector{Int64}}`: Dictionary containing indexes for different components. + + # Returns + - `Dict`: Dictionary containing variances for each innovation component. + +""" function get_variances(ϵ::Vector{Fl}, coefs::Vector{Fl}, components_indexes::Dict{String, Vector{Int64}})::Dict where Fl variances = Dict() @@ -120,6 +289,20 @@ function get_variances(ϵ::Vector{Fl}, coefs::Vector{Fl}, components_indexes::Di return variances end +""" + forecast_model(output::Output, steps_ahead::Int64, Exogenous_Forecast::Matrix{Fl})::Vector{Float64} where Fl + + Returns the forecast for a given number of steps ahead using the provided StateSpaceLearning output and exogenous forecast data. + + # Arguments + - `output::Output`: Output object obtained from model fitting. + - `steps_ahead::Int64`: Number of steps ahead for forecasting. + - `Exogenous_Forecast::Matrix{Fl}`: Exogenous forecast matrix. + + # Returns + - `Vector{Float64}`: Vector containing forecasted values. + +""" function forecast_model(output::Output, steps_ahead::Int64, Exogenous_Forecast::Matrix{Fl})::Vector{Float64} where Fl @assert output.model_type in AVAILABLE_MODELS "Unavailable Model" Exogenous_X = output.X[:, output.components["Exogenous_X"]["Indexes"]] diff --git a/src/structs.jl b/src/structs.jl index 2d6755f..1f797c5 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -1,3 +1,23 @@ +""" + mutable struct Output + + A mutable struct to store various components and results of a model estimation. + + # Fields + - `model_type::String`: Type of the model. + - `X::Matrix`: StateSpaceLearning Matrix data used in the model. + - `coefs::Vector`: Coefficients obtained from the model. + - `ϵ::Vector`: Residuals of the model. + - `fitted::Vector`: Fitted values from the model. + - `components::Dict`: Dictionary containing different components. + - `residuals_variances::Dict`: Dictionary storing variances of residuals for different components. + - `s::Int64`: Integer representing a parameter 's'. + - `T::Int64`: Integer representing a parameter 'T'. + - `outlier::Bool`: Boolean indicating the presence of outlier component. + - `valid_indexes::Vector{Int64}`: Vector containing valid indexes of the time series. + - `stabilize_ζ::Int64`: Stabilize_ζ parameter. + +""" mutable struct Output model_type::String X::Matrix diff --git a/src/utils.jl b/src/utils.jl index def4369..a6e2609 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,3 +1,20 @@ +""" + build_components(X::Matrix{Tl}, coefs::Vector{Float64}, components_indexes::Dict{String, Vector{Int64}}) -> Dict where Tl + + Constructs components dict containing values, indexes and coefficients for each component. + + # Arguments + - X::Matrix{Tl}: Input matrix. + - coefs::Vector{Float64}: Coefficients. + - components_indexes::Dict{String, Vector{Int64}}: Dictionary mapping component names to their indexes. + + # Returns + - components::Dict: Dictionary containing components, each represented by a dictionary with keys: + - "Coefs": Coefficients for the component. + - "Indexes": Indexes associated with the component. + - "Values": Values computed from `X` and component coefficients. + +""" function build_components(X::Matrix{Tl}, coefs::Vector{Float64}, components_indexes::Dict{String, Vector{Int64}})::Dict where Tl components = Dict() for key in keys(components_indexes) @@ -10,6 +27,24 @@ function build_components(X::Matrix{Tl}, coefs::Vector{Float64}, components_inde return components end +""" + build_complete_variables(estimation_ϵ::Vector{Float64}, coefs::Vector{Float64}, X::Matrix{Tl}, valid_indexes::Vector{Int64}, T::Int64) -> Tuple{Vector{Float64}, Vector{Float64}} where Tl + + Builds complete residuals and fit in sample. Residuals will contain nan values for non valid indexes. Fit in Sample will be a vector of fitted values computed from input data and coefficients (non valid indexes will also be calculated via interpolation). + + # Arguments + - `estimation_ϵ::Vector{Float64}`: Vector of estimation errors. + - `coefs::Vector{Float64}`: Coefficients. + - `X::Matrix{Tl}`: Input matrix. + - `valid_indexes::Vector{Int64}`: Valid indexes. + - `T::Int64`: Length of the original time series. + + # Returns + - Tuple containing: + - `ϵ::Vector{Float64}`: Vector containing NaN values filled with estimation errors at valid indexes. + - `fitted::Vector{Float64}`: Vector of fitted values computed from input data and coefficients. + +""" function build_complete_variables(estimation_ϵ::Vector{Float64}, coefs::Vector{Float64}, X::Matrix{Tl}, valid_indexes::Vector{Int64}, T::Int64)::Tuple{Vector{Float64}, Vector{Float64}} where Tl ϵ = fill(NaN, T); ϵ[valid_indexes] = estimation_ϵ fitted = X*coefs