Skip to content
This repository has been archived by the owner on Jul 16, 2021. It is now read-only.

ENH: Add Ridge Regression #169

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ It aims to combine speed and ease of use - without requiring a huge number of ex
This project began as a way for me to learn Rust and brush up on some less familiar machine learning algorithms and techniques.
Now the project aims to provide a complete, easy to use, machine learning library for Rust.

This library is still very much in early stages of development. Although there are a good number of algorithms many other
This library is still very much in early stages of development. Although there are a good number of algorithms many other
things are missing. Rusty-machine is probably not the best choice for any serious projects - but hopefully that can change in the near future!

#### Contributing
Expand Down Expand Up @@ -56,6 +56,7 @@ This is fairly complete but there is still lots of room for optimization and we
- Gaussian Mixture Models
- Naive Bayes Classifiers
- DBSCAN
- Ridge Regression

There is also a basic `stats` module behind a feature flag.

Expand Down
41 changes: 1 addition & 40 deletions src/learning/lin_reg.rs → src/learning/lin_reg/lin_reg_impl.rs
Original file line number Diff line number Diff line change
@@ -1,35 +1,3 @@
//! Linear Regression module
//!
//! Contains implemention of linear regression using
//! OLS and gradient descent optimization.
//!
//! The regressor will automatically add the intercept term
//! so you do not need to format the input matrices yourself.
//!
//! # Usage
//!
//! ```
//! use rusty_machine::learning::lin_reg::LinRegressor;
//! use rusty_machine::learning::SupModel;
//! use rusty_machine::linalg::Matrix;
//! use rusty_machine::linalg::Vector;
//!
//! let inputs = Matrix::new(4,1,vec![1.0,3.0,5.0,7.0]);
//! let targets = Vector::new(vec![1.,5.,9.,13.]);
//!
//! let mut lin_mod = LinRegressor::default();
//!
//! // Train the model
//! lin_mod.train(&inputs, &targets).unwrap();
//!
//! // Now we'll predict a new point
//! let new_point = Matrix::new(1,1,vec![10.]);
//! let output = lin_mod.predict(&new_point).unwrap();
//!
//! // Hopefully we classified our new point correctly!
//! assert!(output[0] > 17f64, "Our regressor isn't very good!");
//! ```

use linalg::{Matrix, BaseMatrix};
use linalg::Vector;
use learning::{LearningResult, SupModel};
Expand All @@ -39,14 +7,7 @@ use learning::optim::grad_desc::GradientDesc;
use learning::optim::{OptimAlgorithm, Optimizable};
use learning::error::Error;

/// Linear Regression Model.
///
/// Contains option for optimized parameter.
#[derive(Debug)]
pub struct LinRegressor {
/// The parameters for the regression model.
parameters: Option<Vector<f64>>,
}
use super::LinRegressor;

impl Default for LinRegressor {
fn default() -> LinRegressor {
Expand Down
55 changes: 55 additions & 0 deletions src/learning/lin_reg/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
//! Linear Regression module
//!
//! Contains implemention of linear regression using
//! OLS and gradient descent optimization.
//!
//! The regressor will automatically add the intercept term
//! so you do not need to format the input matrices yourself.
//!
//! # Usage
//!
//! ```
//! use rusty_machine::learning::lin_reg::LinRegressor;
//! use rusty_machine::learning::SupModel;
//! use rusty_machine::linalg::Matrix;
//! use rusty_machine::linalg::Vector;
//!
//! let inputs = Matrix::new(4,1,vec![1.0,3.0,5.0,7.0]);
//! let targets = Vector::new(vec![1.,5.,9.,13.]);
//!
//! let mut lin_mod = LinRegressor::default();
//!
//! // Train the model
//! lin_mod.train(&inputs, &targets).unwrap();
//!
//! // Now we'll predict a new point
//! let new_point = Matrix::new(1,1,vec![10.]);
//! let output = lin_mod.predict(&new_point).unwrap();
//!
//! // Hopefully we classified our new point correctly!
//! assert!(output[0] > 17f64, "Our regressor isn't very good!");
//! ```

use linalg::Vector;

mod lin_reg_impl;
mod ridge_reg_impl;

/// Linear Regression Model.
///
/// Contains option for optimized parameter.
#[derive(Debug)]
pub struct LinRegressor {
/// The parameters for the regression model.
parameters: Option<Vector<f64>>,
}

/// Ridge Regression Model.
///
/// Contains option for optimized parameter.
#[derive(Debug)]
pub struct RidgeRegressor {
alpha: f64,
/// The parameters for the regression model.
parameters: Option<Vector<f64>>,
}
93 changes: 93 additions & 0 deletions src/learning/lin_reg/ridge_reg_impl.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
use linalg::{Matrix, BaseMatrix, BaseMatrixMut};
use linalg::Vector;
use learning::{LearningResult, SupModel};
use learning::error::Error;

use super::RidgeRegressor;

impl Default for RidgeRegressor {
fn default() -> Self {
RidgeRegressor {
alpha: 1.0,
parameters: None
}
}
}

impl RidgeRegressor {

/// Constructs untrained Ridge regression model.
///
/// Requires L2 regularization parameter (alpha).
///
/// # Examples
///
/// ```
/// use rusty_machine::learning::lin_reg::RidgeRegressor;
///
/// let model = RidgeRegressor::new(1.0);
/// ```
pub fn new(alpha: f64) -> Self {
assert!(alpha >= 0., "alpha must be equal or larger than 0.");
RidgeRegressor {
alpha: alpha,
parameters: None
}
}

/// Get the parameters from the model.
///
/// Returns an option that is None if the model has not been trained.
pub fn parameters(&self) -> Option<&Vector<f64>> {
self.parameters.as_ref()
}
}

impl SupModel<Matrix<f64>, Vector<f64>> for RidgeRegressor {
/// Train the ridge regression model.
///
/// Takes training data and output values as input.
///
/// # Examples
///
/// ```
/// use rusty_machine::learning::lin_reg::RidgeRegressor;
/// use rusty_machine::linalg::Matrix;
/// use rusty_machine::linalg::Vector;
/// use rusty_machine::learning::SupModel;
///
/// let mut model = RidgeRegressor::default();
/// let inputs = Matrix::new(3, 1, vec![2.0, 3.0, 4.0]);
/// let targets = Vector::new(vec![5.0, 10.0, 7.0]);
///
/// model.train(&inputs, &targets).unwrap();
/// ```
fn train(&mut self, inputs: &Matrix<f64>, targets: &Vector<f64>) -> LearningResult<()> {

let ones = Matrix::<f64>::ones(inputs.rows(), 1);
let full_inputs = ones.hcat(inputs);
let xt = full_inputs.transpose();
// cancel regularization of intercept
let mut eye = Matrix::<f64>::identity(inputs.cols() + 1);
unsafe {
*eye.get_unchecked_mut([0, 0]) = 0.
}
let left = &xt * full_inputs + eye * self.alpha;
let right = &xt * targets;
self.parameters = Some(left.solve(right).expect("Unable to solve linear equation."));
Ok(())
}

/// Predict output value from input data.
///
/// Model must be trained before prediction can be made.
fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Vector<f64>> {
if let Some(ref v) = self.parameters {
let ones = Matrix::<f64>::ones(inputs.rows(), 1);
let full_inputs = ones.hcat(inputs);
Ok(full_inputs * v)
} else {
Err(Error::new_untrained())
}
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
//! - Gaussian Mixture Models
//! - Naive Bayes Classifiers
//! - DBSCAN
//! - Ridge Regression
//!
//! ### linalg
//!
Expand Down
121 changes: 118 additions & 3 deletions tests/learning/lin_reg.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
use rm::linalg::Matrix;
use rm::linalg::Vector;
use rm::learning::SupModel;
use rm::learning::lin_reg::LinRegressor;
use rm::learning::lin_reg::{LinRegressor, RidgeRegressor};
use libnum::abs;

// **********************************************
// Linear Regression
// **********************************************

#[test]
fn test_optimized_regression() {
let mut lin_mod = LinRegressor::default();
Expand All @@ -16,7 +20,7 @@ fn test_optimized_regression() {
}

#[test]
fn test_regression() {
fn test_linear_regression() {
let mut lin_mod = LinRegressor::default();
let inputs = Matrix::new(3, 1, vec![2.0, 3.0, 4.0]);
let targets = Vector::new(vec![5.0, 6.0, 7.0]);
Expand All @@ -32,6 +36,18 @@ fn test_regression() {
assert!(err_2 < 1e-8);
}

#[test]
fn test_linear_regression_outlier() {
let mut lin_mod = LinRegressor::default();
let inputs = matrix![1.; 2.; 3.; 4.; 5.];
let targets = Vector::new(vec![10., 20., 30., 80., 50.]);

lin_mod.train(&inputs, &targets).unwrap();

let parameters = lin_mod.parameters().unwrap();
assert_eq!(parameters, &Vector::new(vec![-4.000000000000008, 14.000000000000002]))
}

#[test]
#[should_panic]
fn test_no_train_params() {
Expand All @@ -51,7 +67,7 @@ fn test_no_train_predict() {

#[cfg(feature = "datasets")]
#[test]
fn test_regression_datasets_trees() {
fn test_linear_regression_datasets_trees() {
use rm::datasets::trees;
let trees = trees::load();

Expand All @@ -70,4 +86,103 @@ fn test_regression_datasets_trees() {
44.87770231764652, 50.942867757643015, 52.223751092491256, 53.42851282520877,
53.899328875510534, 53.899328875510534, 68.51530482306926];
assert_eq!(predicted, Vector::new(expected));
}


// **********************************************
// Ridge Regression
// **********************************************

#[test]
fn test_ridge_regression_outlier() {
let mut model = RidgeRegressor::default();
let inputs = matrix![1.; 2.; 3.; 4.; 5.];
let targets = Vector::new(vec![10., 20., 30., 80., 50.]);

model.train(&inputs, &targets).unwrap();

let parameters = model.parameters().unwrap();
assert_eq!(parameters, &Vector::new(vec![-0.18181818181820594, 12.727272727272734]));

let mut model = RidgeRegressor::new(0.1);
let inputs = matrix![1.; 2.; 3.; 4.; 5.];
let targets = Vector::new(vec![10., 20., 30., 80., 50.]);
model.train(&inputs, &targets).unwrap();
let parameters = model.parameters().unwrap();
assert_eq!(parameters, &Vector::new(vec![-3.5841584158415647, 13.861386138613856]));
}

#[test]
#[should_panic]
fn test_ridge_regression_invalid_alpha() {
RidgeRegressor::new(-1.0);
}


#[cfg(feature = "datasets")]
#[test]
fn test_ridge_regression_datasets_trees() {
use rm::datasets::trees;
let trees = trees::load();

let mut model = RidgeRegressor::default();
model.train(&trees.data(), &trees.target()).unwrap();
let params = model.parameters().unwrap();
assert_eq!(params, &Vector::new(vec![-58.09806161950894, 4.68684745409343, 0.34441921086952676]));

let predicted = model.predict(&trees.data()).unwrap();
let expected = vec![4.9121170103334, 4.596075192213792, 4.844606261293432, 15.912019831077998, 19.949162219722425,
21.106685386870826, 16.18892829290755, 19.288701190733292, 21.479481990490267, 20.226070681551974,
22.072432270439435, 21.5078593832402, 21.5078593832402, 20.502979143381534, 23.975548644826723,
27.84929214264129, 31.637903462206083, 33.85706165471298, 30.565512473307443, 28.623262742630104,
34.38250118562216, 36.00870909817989, 35.34824806919077, 41.68968082859186, 44.81783111916752,
50.882355416739074, 52.16414411842728, 53.35004467832559, 53.818729423734936, 53.818729423734936,
68.41546728046455];
assert_eq!(predicted, Vector::new(expected));
}

#[cfg(feature = "datasets")]
#[test]
fn test_ridge_regression_datasets_trees_alpha01() {
use rm::datasets::trees;
let trees = trees::load();

let mut model = RidgeRegressor::new(0.1);
model.train(&trees.data(), &trees.target()).unwrap();
let params = model.parameters().unwrap();
assert_eq!(params, &Vector::new(vec![-57.99878658933356, 4.706019761728981, 0.3397708268791373]));

let predicted = model.predict(&trees.data()).unwrap();
let expected = vec![4.84513531455659, 4.558087108679594, 4.819749407267118, 15.877920444118622, 19.877061838376648,
21.027205468307827, 16.19230536370829, 19.250242805620523, 21.419698916189105, 20.19144675796631,
22.02113204165577, 21.472421537191252, 21.472421537191252, 20.50583167755598, 23.956262567349498,
27.85190952602645, 31.589388621696962, 33.81156735326769, 30.597412854772223, 28.689619042791165,
34.38761457144487, 36.00836017754894, 35.38154114479282, 41.761029133628014, 44.871689196542405,
50.936792265787936, 52.21776704501286, 53.420633295946175, 53.891235272119076, 53.891235272119076,
68.50528244076838];
assert_eq!(predicted, Vector::new(expected));
}

#[cfg(feature = "datasets")]
#[test]
fn test_ridge_regression_datasets_trees_alpha00() {
// should be the same as LinRegressor
use rm::datasets::trees;
let trees = trees::load();

let mut model = RidgeRegressor::new(0.0);
model.train(&trees.data(), &trees.target()).unwrap();
let params = model.parameters().unwrap();
assert_eq!(params, &Vector::new(vec![-57.98765891838409, 4.708160503017506, 0.3392512342447438]));

let predicted = model.predict(&trees.data()).unwrap();
let expected = vec![4.837659653793278, 4.55385163347481, 4.816981265588826, 15.874115228921276,
19.869008437727473, 21.018326956518717, 16.192688074961563, 19.245949183164257,
21.413021404689726, 20.187581283767756, 22.015402271048487, 21.468464618616007,
21.468464618616007, 20.50615412980805, 23.954109686181766, 27.852202904652785,
31.583966481344966, 33.806481916796706, 30.60097760433255, 28.697035014921106,
34.388184394951004, 36.008318964043994, 35.38525970948079, 41.76899799551756,
44.87770231764652, 50.942867757643015, 52.223751092491256, 53.42851282520877,
53.899328875510534, 53.899328875510534, 68.51530482306926];
assert_eq!(predicted, Vector::new(expected));
}