Skip to content

Commit

Permalink
Refacto Api v2 with descent algorithms.
Browse files Browse the repository at this point in the history
  • Loading branch information
geosarr committed Aug 2, 2024
1 parent 6689100 commit fb29ec2
Show file tree
Hide file tree
Showing 13 changed files with 70 additions and 337 deletions.
2 changes: 1 addition & 1 deletion src/first_order.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ mod adaptive_descent;
mod macros;
mod steepest_descent;

pub use steepest_descent::{descent, Descent, DescentParameter};
pub use steepest_descent::{descent, Armijo, DescentParameter, PowellWolfe};
4 changes: 2 additions & 2 deletions src/first_order/adaptive_descent.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
mod adadelta;
mod adagrad;

pub(crate) use adadelta::adadelta;
pub(crate) use adagrad::adagrad;
pub(crate) use adadelta::AdaDelta;
pub(crate) use adagrad::AdaGrad;

pub(crate) const ACCUM_GRAD: &str = "accum_grad";
pub(crate) const ACCUM_UPDATE: &str = "accum_update";
30 changes: 4 additions & 26 deletions src/first_order/adaptive_descent/adadelta.rs
Original file line number Diff line number Diff line change
@@ -1,45 +1,23 @@
use core::ops::{Add, Div, Mul};
use core::ops::{Add, Mul};

use crate::{
first_order::{
adaptive_descent::{ACCUM_GRAD, ACCUM_UPDATE},
macros::{descent_rule, impl_iterator_descent},
macros::{descent_rule, impl_optimizer_descent},
},
traits::{VecDot, Vector},
Counter,
Counter, Optimizer,
};
use hashbrown::HashMap;
use num_traits::{Float, One};

pub(crate) fn adadelta<A, X>(
accum_grad: &mut X,
accum_update: &X,
squared_grad: &X,
gamma: A,
epsilon: A,
) -> X
where
for<'a> A: Float + Add<&'a X, Output = X> + Mul<&'a X, Output = X> + Mul<X, Output = X>,
X: Add<X, Output = X> + Div<X, Output = X> + FromIterator<A> + IntoIterator<Item = A>,
{
*accum_grad = gamma * &*accum_grad + (A::one() - gamma) * squared_grad;
(epsilon + accum_update)
.into_iter()
.map(|g| g.sqrt())
.collect::<X>()
/ (epsilon + &*accum_grad)
.into_iter()
.map(|g| g.sqrt())
.collect::<X>()
}

descent_rule!(
AdaDelta,
X,
[].into_iter().collect::<X>(),
[(ACCUM_GRAD, X::zero(1)), (ACCUM_UPDATE, X::zero(1))].into()
);
impl_iterator_descent!(AdaDelta, X);
impl_optimizer_descent!(AdaDelta, X);

impl<'a, X, F, G> AdaDelta<'a, X, F, G, X>
where
Expand Down
22 changes: 4 additions & 18 deletions src/first_order/adaptive_descent/adagrad.rs
Original file line number Diff line number Diff line change
@@ -1,38 +1,24 @@
use core::ops::{Add, Div, Mul};
use core::ops::{Add, Mul};

use num_traits::Float;

use crate::{
first_order::{
adaptive_descent::ACCUM_GRAD,
macros::{descent_rule, impl_iterator_descent},
macros::{descent_rule, impl_optimizer_descent},
},
traits::{VecDot, Vector},
Counter,
Counter, Optimizer,
};
use hashbrown::HashMap;

pub(crate) fn adagrad<A, X>(accum_grad: &mut X, squared_grad: X, gamma: A, epsilon: A) -> X
where
for<'a> A: Float + Add<&'a X, Output = X> + Div<X, Output = X>,
for<'b> &'b X: Add<X, Output = X>,
X: FromIterator<A> + IntoIterator<Item = A> + Clone,
{
*accum_grad = &*accum_grad + squared_grad;
gamma
/ (epsilon + &*accum_grad)
.into_iter()
.map(|g| g.sqrt())
.collect::<X>()
}

descent_rule!(
AdaGrad,
X,
[].into_iter().collect::<X>(),
[(ACCUM_GRAD, X::zero(1))].into()
);
impl_iterator_descent!(AdaGrad, X);
impl_optimizer_descent!(AdaGrad, X);

impl<'a, X, F, G> AdaGrad<'a, X, F, G, X>
where
Expand Down
29 changes: 24 additions & 5 deletions src/first_order/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,16 @@ macro_rules! descent_rule {
};
}

macro_rules! impl_iterator_descent {
macro_rules! impl_optimizer_descent {
($rule:ident, $step:ty) => {
impl<'a, X, F, G> core::iter::Iterator for $rule<'a, X, F, G, $step>
where
X: Vector + VecDot<Output = X::Elem>,
X: Vector + VecDot<Output = X::Elem> + Clone,
for<'b> &'b X: Add<X, Output = X> + Mul<&'b X, Output = X>,
F: Fn(&X) -> X::Elem,
G: Fn(&X) -> X,
{
type Item = X::Elem;
type Item = X;
fn next(&mut self) -> Option<Self::Item> {
if self.stop() {
None
Expand All @@ -91,12 +91,31 @@ macro_rules! impl_iterator_descent {
self.counter.iter += 1;
self.neg_gradfx = -self.grad(&self.x);
self.counter.gcalls += 1;
Some(self.stop_metrics)
Some(self.x.clone())
}
}
}
impl<'a, X, F, G> Optimizer for $rule<'a, X, F, G, $step>
where
X: Vector + VecDot<Output = X::Elem> + Clone,
for<'b> &'b X: Add<X, Output = X> + Mul<&'b X, Output = X>,
F: Fn(&X) -> X::Elem,
G: Fn(&X) -> X,
{
type Iterate = X;
type Intermediate = HashMap<&'a str, $step>;
fn nb_iter(&self) -> usize {
self.counter.iter
}
fn iterate(&self) -> X {
self.x.clone()
}
fn intermediate(&self) -> Self::Intermediate {
HashMap::from([("sigma", self.sigma.clone())])
}
}
};
}

pub(crate) use descent_rule;
pub(crate) use impl_iterator_descent;
pub(crate) use impl_optimizer_descent;
190 changes: 15 additions & 175 deletions src/first_order/steepest_descent.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
mod armijo;
mod powell_wolfe;
mod unit_test;
use crate::first_order::adaptive_descent::{adadelta, adagrad};
use crate::first_order::adaptive_descent::{AdaDelta, AdaGrad};
use crate::{
traits::{VecDot, Vector},
Number, Optimizer, TuutalError,
};
use armijo::armijo;
pub use armijo::Armijo;
use core::{
fmt::Debug,
ops::{Add, Mul},
};
use num_traits::One;
use powell_wolfe::powell_wolfe;
pub use powell_wolfe::PowellWolfe;

/// Parameters used in the a descent method.
///
Expand Down Expand Up @@ -154,14 +153,6 @@ where
assert!(beta > T::zero());
Self::AdaDelta { gamma, beta }
}
fn step_size_is_scalar(&self) -> bool {
match self {
Self::Armijo { gamma: _, beta: _ } => true,
Self::PowellWolfe { gamma: _, beta: _ } => true,
Self::AdaGrad { gamma: _, beta: _ } => false,
Self::AdaDelta { gamma: _, beta: _ } => false,
}
}
}

/// A descent algorithm using some step size method.
Expand Down Expand Up @@ -221,7 +212,7 @@ pub fn descent<X, F, G>(
gradf: G,
x0: &X,
params: &DescentParameter<X::Elem>,
eps: X::Elem,
gtol: X::Elem,
maxiter: usize,
) -> Result<X, TuutalError<X>>
where
Expand All @@ -230,169 +221,18 @@ where
F: Fn(&X) -> X::Elem,
G: Fn(&X) -> X,
{
let mut desc = Descent::new(f, gradf, x0.clone(), *params, eps);
desc.optimize(Some(maxiter))
}

/// Represents the sequence of iterates computed by a steepest descent algorithm.
pub struct Descent<X, F, G>
where
X: Vector,
F: Fn(&X) -> X::Elem,
G: Fn(&X) -> X,
{
f: F,
gradf: G,
params: DescentParameter<X::Elem>,
x: X,
eps: X::Elem,
iter: usize,
sigma: X,
accum_grad: X,
accum_update: X,
fcalls: usize,
}

impl<X, F, G> Descent<X, F, G>
where
X: Vector,
F: Fn(&X) -> X::Elem,
G: Fn(&X) -> X,
{
pub fn new(f: F, gradf: G, x: X, params: DescentParameter<X::Elem>, eps: X::Elem) -> Self
where
X: Vector,
for<'a> &'a X: Add<X, Output = X>,
{
let dim = x.len();
if params.step_size_is_scalar() {
Self {
f,
gradf,
params,
x,
iter: 0,
eps,
sigma: X::zero(1),
accum_grad: X::zero(1),
accum_update: X::zero(1),
fcalls: 0,
}
} else {
Self {
f,
gradf,
params,
x,
iter: 0,
eps,
sigma: X::zero(dim),
accum_grad: X::zero(dim),
accum_update: X::zero(dim),
fcalls: 0,
}
match params {
DescentParameter::Armijo { gamma, beta } => {
Armijo::new(f, gradf, x0.clone(), *gamma, *beta, gtol).optimize(Some(maxiter))
}
}
/// Reference to the objective function
pub(crate) fn obj(&self) -> &F {
&self.f
}
/// Reference to the gradient of the objective function
pub(crate) fn grad_obj(&self) -> &G {
&self.gradf
}
/// Number of iterations done so far.
pub fn nb_iter(&self) -> usize {
self.iter
}
/// Current iterate.
pub fn x(&self) -> &X {
&self.x
}
/// Current step size.
pub fn sigma(&self) -> &X {
&self.sigma
}
}

impl<X, F, G> core::iter::Iterator for Descent<X, F, G>
where
X: Vector + Clone + VecDot<Output = X::Elem>,
for<'a> &'a X: Add<X, Output = X> + Mul<&'a X, Output = X> + Mul<X, Output = X>,
F: Fn(&X) -> X::Elem,
G: Fn(&X) -> X,
{
type Item = X;
fn next(&mut self) -> Option<Self::Item> {
let neg_gradfx = -self.grad_obj()(&self.x);
let squared_norm_2_gradfx = neg_gradfx.dot(&neg_gradfx);
if squared_norm_2_gradfx <= (self.eps * self.eps) {
self.iter += 1;
None
} else {
let mut fcalls = self.fcalls;
self.sigma = match self.params {
DescentParameter::Armijo { gamma, beta } => [armijo(
self.obj(),
&self.x,
&neg_gradfx,
squared_norm_2_gradfx,
gamma,
beta,
&mut fcalls,
)
.1]
.into_iter()
.collect::<X>(),
DescentParameter::PowellWolfe { gamma, beta } => [powell_wolfe(
(self.obj(), self.grad_obj()),
&self.x,
&neg_gradfx,
squared_norm_2_gradfx,
gamma,
beta,
&mut fcalls,
)
.1]
.into_iter()
.collect::<X>(),
DescentParameter::AdaGrad { gamma, beta } => {
let squared_grad = &neg_gradfx * &neg_gradfx;
adagrad(&mut self.accum_grad, squared_grad, gamma, beta)
}
DescentParameter::AdaDelta { gamma, beta } => {
let squared_grad = &neg_gradfx * &neg_gradfx;
let step_size = adadelta(
&mut self.accum_grad,
&self.accum_update,
&squared_grad,
gamma,
beta,
);
self.accum_update = gamma * &self.accum_update
+ (X::Elem::one() - gamma) * (&step_size * &step_size) * squared_grad;
step_size
}
};
self.x = &self.x + &self.sigma * neg_gradfx;
self.iter += 1;
Some(self.x.clone())
DescentParameter::PowellWolfe { gamma, beta } => {
PowellWolfe::new(f, gradf, x0.clone(), *gamma, *beta, gtol).optimize(Some(maxiter))
}
DescentParameter::AdaDelta { gamma, beta } => {
AdaDelta::new(f, gradf, x0.clone(), *gamma, *beta, gtol).optimize(Some(maxiter))
}
DescentParameter::AdaGrad { gamma, beta } => {
AdaGrad::new(f, gradf, x0.clone(), *gamma, *beta, gtol).optimize(Some(maxiter))
}
}
}

impl<X, F, G> Optimizer for Descent<X, F, G>
where
X: Vector + Clone + VecDot<Output = X::Elem>,
for<'a> &'a X: Add<X, Output = X> + Mul<&'a X, Output = X> + Mul<X, Output = X>,
F: Fn(&X) -> X::Elem,
G: Fn(&X) -> X,
{
type Iterate = X;
fn nb_iter(&self) -> usize {
self.nb_iter()
}
fn iterate(&self) -> X {
self.x.clone()
}
}
Loading

0 comments on commit fb29ec2

Please sign in to comment.