-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Dynamic Programming c++ implementation #9
base: main
Are you sure you want to change the base?
Changes from 5 commits
bb10165
d13dfc7
5bbbaf2
930430a
379897a
e35cff1
e8ccb7c
fccca35
0c64950
fe6bac1
245a92f
0af1b08
66e2104
a4cff6f
9ccf1fe
8da0fe0
943a6c0
98ee45c
744b928
b8dd0f2
c7e6920
7771157
f7287f8
47aedc0
4fd3a58
4dd27f8
f51ff1c
c0afe6f
77e3e00
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
"""Implements dynamic programming class for optimal segementation algorithm.""" | ||
import _DynP | ||
import _dupin | ||
import numpy as np | ||
|
||
|
||
|
@@ -18,13 +18,37 @@ class DynP: | |
min_size: int | ||
Minimum size of a segment. Changing will not provide optimal | ||
detection, but will reduce runtime. | ||
|
||
|
||
Methods | ||
------- | ||
__init__(self, data: np.ndarray, num_bkps: int, jump: int, min_size: int) | ||
Initializes the DynamicProgramming instance with the time series data | ||
and parameters. | ||
set_num_threads(self, num_threads: int) | ||
Sets the number of threads to be used for parallel computation. | ||
fit(self, num_bkps: int) -> list | ||
Calculates the cost matrix and identifies the optimal breakpoints in | ||
the time series data. | ||
|
||
Example Usage | ||
------------- | ||
>>> import numpy as np | ||
>>> from dynp import DynP | ||
>>> data = np.random.rand(100, 1) # Simulated time series data | ||
>>> num_bkps = 3 # Number of breakpoints to detect | ||
>>> jump = 1 # Interval for checking potential breakpoints | ||
>>> min_size = 3 # Minimum size of a segment | ||
>>> model = Dynp(data, num_bkps, jump, min_size) | ||
>>> breakpoints = model.fit(num_bkps) | ||
>>> print(breakpoints) | ||
""" | ||
|
||
def __init__( | ||
self, data: np.ndarray, num_bkps: int, jump: int, min_size: int | ||
): | ||
"""Initialize the DynamicProgramming instance with given parameters.""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you document a class's |
||
self.dynp = _DynP.DynamicProgramming(data, num_bkps, jump, min_size) | ||
self._dupin = _dupin.DynamicProgramming(data, num_bkps, jump, min_size) | ||
|
||
def set_num_threads(self, num_threads: int): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Long term dev wise, if additional CPP methods will be added we will most likely want num_threads to be controlled on the level of the whole module? Would preprocessor be a good place to have this? @b-butler thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding this to as a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so I would just move this function to util.py right? would just have to import _DynP in util.py? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes |
||
"""Set the number of threads for parallelization. | ||
|
@@ -35,9 +59,9 @@ def set_num_threads(self, num_threads: int): | |
The number of threads to use during computation. Default | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not a parameter default. If you want to add a comment about the default parallelization it should go above this section. Also, the message should state that we use all available cores unless set otherwise. |
||
is determined automatically. | ||
""" | ||
self.dynp.set_threads(num_threads) | ||
self._dupin.set_threads(num_threads) | ||
|
||
def fit(self, num_bkps: int) -> list: | ||
def fit(self, num_breakpoints: int) -> list[int]: | ||
"""Calculate the cost matrix and return the breakpoints. | ||
|
||
Parameters | ||
|
@@ -49,4 +73,4 @@ def fit(self, num_bkps: int) -> list: | |
------- | ||
list: A list of integers representing the breakpoints. | ||
""" | ||
return self.dynp.fit() | ||
return self._dupin.fit(num_breakpoints) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,22 +13,22 @@ using namespace std; | |
using namespace Eigen; | ||
|
||
DynamicProgramming::DynamicProgramming() | ||
: num_bkps(1), num_parameters(0), num_timesteps(0), jump(1), min_size(3) {} | ||
: num_features(0), num_timesteps(0), jump(1), min_size(3), cost_matrix(0) {} | ||
|
||
DynamicProgramming::DynamicProgramming(const Eigen::MatrixXd &data, int num_bkps_, | ||
int jump_, int min_size_) | ||
: data(data), num_bkps(num_bkps_), | ||
jump(jump_), min_size(min_size_) { | ||
|
||
DynamicProgramming::DynamicProgramming(const Eigen::MatrixXd &data, | ||
int jump_, int min_size_) | ||
: data(data), jump(jump_), min_size(min_size_), cost_matrix(data.rows()) { | ||
num_timesteps = data.rows(); | ||
num_parameters = data.cols(); | ||
num_features = data.cols(); | ||
} | ||
|
||
void DynamicProgramming::scale_data() { | ||
Eigen::VectorXd min_val = data.colwise().minCoeff(); | ||
Eigen::VectorXd max_val = data.colwise().maxCoeff(); | ||
Eigen::VectorXd range = max_val - min_val; | ||
|
||
for (int j = 0; j < num_parameters; ++j) { | ||
for (int j = 0; j <num_features; ++j) { | ||
if (range(j) == 0.0) { | ||
data.col(j).setZero(); | ||
} else { | ||
|
@@ -42,51 +42,58 @@ void DynamicProgramming::regression_setup(linear_fit_struct &lfit) { | |
lfit.y = data; | ||
} | ||
|
||
Eigen::VectorXd DynamicProgramming::regression_line(int start, int end, int dim, | ||
linear_fit_struct &lfit) { | ||
int n = end - start; | ||
Eigen::VectorXd x = lfit.x.segment(start, n); | ||
Eigen::VectorXd y = lfit.y.col(dim).segment(start, n); | ||
//work in progress, the rowwise colwise is messing up | ||
Eigen::MatrixXd DynamicProgramming::regression_lines(int start, int end, linear_fit_struct &lfit) { | ||
int n = end - start; | ||
Eigen::VectorXd x = lfit.x.segment(start, n); | ||
Eigen::MatrixXd y = lfit.y.block(start, 0, n, num_features); | ||
|
||
// Ensure x is in a two-dimensional form for broadcasting | ||
Eigen::MatrixXd x_matrix = x.replicate(1, num_features); | ||
|
||
// Calculate means | ||
double x_mean = x.mean(); | ||
Eigen::VectorXd y_mean = y.colwise().mean(); | ||
|
||
// Center the data around 0 | ||
Eigen::MatrixXd x_centered = x_matrix.colwise() - Eigen::VectorXd::Constant(n, x_mean); | ||
Eigen::MatrixXd y_centered = y.rowwise() - y_mean.transpose(); | ||
|
||
double x_mean = x.mean(); | ||
double y_mean = y.mean(); | ||
// Calculate slopes for each feature | ||
Eigen::VectorXd slope = (x_centered.array() * y_centered.array()).colwise().sum() / x_centered.array().square().sum(); | ||
|
||
Eigen::VectorXd x_centered = x.array() - x_mean; | ||
Eigen::VectorXd y_centered = y.array() - y_mean; | ||
// Calculate intercepts for each feature | ||
Eigen::VectorXd intercept = y_mean.array() - slope.array() * x_mean; | ||
|
||
double slope = x_centered.dot(y_centered) / x_centered.squaredNorm(); | ||
double intercept = y_mean - slope * x_mean; | ||
// everything till this line is functioning fine; I might be overcomplicating it | ||
Eigen::MatrixXd regression_lines = (x_matrix.array().colwise() - x_mean).colwise() * slope.array() + intercept.transpose().array(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you just need to do something like |
||
|
||
return x.unaryExpr( | ||
[slope, intercept](double xi) { return slope * xi + intercept; }); | ||
return regression_lines; | ||
} | ||
|
||
double DynamicProgramming::l2_cost(Eigen::MatrixXd &predicted_y, int start, int end) { | ||
Eigen::MatrixXd diff = predicted_y.block(start, 0, end - start, num_parameters) - | ||
data.block(start, 0, end - start, num_parameters); | ||
return std::sqrt(diff.array().square().sum()); | ||
double DynamicProgramming::l2_cost(const Eigen::MatrixXd &predicted_y, int start, int end) { | ||
Eigen::MatrixXd diff = predicted_y.block(start, 0, end - start, num_features) - | ||
data.block(start, 0, end - start, num_features); | ||
return std::sqrt(diff.array().square().sum()); | ||
} | ||
|
||
Eigen::MatrixXd DynamicProgramming::predicted(int start, int end, | ||
linear_fit_struct &lfit) { | ||
Eigen::MatrixXd predicted_y(num_timesteps, num_parameters); | ||
for (int i = 0; i < num_parameters; ++i) { | ||
predicted_y.block(start, i, end - start, 1) = | ||
regression_line(start, end, i, lfit); | ||
} | ||
return predicted_y; | ||
void DynamicProgramming::predicted(int start, int end, linear_fit_struct &lfit, | ||
Eigen::MatrixXd &predicted_y) { | ||
predicted_y.block(start, 0, end - start, num_features) = regression_lines(start, end, lfit); | ||
} | ||
|
||
double DynamicProgramming::cost_function(int start, int end) { | ||
linear_fit_struct lfit; | ||
regression_setup(lfit); | ||
Eigen::MatrixXd predicted_y = predicted(start, end, lfit); | ||
|
||
Eigen::MatrixXd predicted_y(num_timesteps, num_features); | ||
predicted(start, end, lfit, predicted_y); // Fill the predicted_y matrix | ||
|
||
return l2_cost(predicted_y, start, end); | ||
} | ||
|
||
void DynamicProgramming::initialize_cost_matrix() { | ||
scale_data(); | ||
cost_matrix.initialize(num_timesteps); | ||
tbb::parallel_for(tbb::blocked_range<int>(0, num_timesteps), | ||
[&](const tbb::blocked_range<int> &r) { | ||
for (int i = r.begin(); i < r.end(); ++i) { | ||
|
@@ -112,7 +119,9 @@ std::pair<double, std::vector<int>> DynamicProgramming::seg(int start, int end, | |
std::pair<double, std::vector<int>> best = {std::numeric_limits<double>::infinity(), {}}; | ||
|
||
for (int bkp = start + min_size; bkp < end; bkp++) { | ||
if ((bkp - start) >= min_size && (end - bkp) >= min_size) { | ||
if ((bkp - start) < min_size || (end - bkp) < min_size) { | ||
continue; | ||
} | ||
auto left = seg(start, bkp, num_bkps - 1); | ||
auto right = seg(bkp, end, 0); | ||
double cost = left.first + right.first; | ||
|
@@ -130,21 +139,17 @@ std::pair<double, std::vector<int>> DynamicProgramming::seg(int start, int end, | |
return best; | ||
} | ||
|
||
std::vector<int> DynamicProgramming::compute_breakpoints() { | ||
std::vector<int> DynamicProgramming::compute_breakpoints(int num_bkps) { | ||
auto result = seg(0, num_timesteps - 1, num_bkps); | ||
std::vector<int> breakpoints = result.second; | ||
std::sort(breakpoints.begin(), breakpoints.end()); | ||
breakpoints.erase(std::unique(breakpoints.begin(), breakpoints.end()), | ||
breakpoints.end()); | ||
return breakpoints; | ||
} | ||
|
||
std::vector<int> DynamicProgramming::fit(int num_bkps_in){ | ||
num_bkps = num_bkps_in; | ||
std::vector<int> DynamicProgramming::fit(int num_bkps){ | ||
if (!cost_computed){ | ||
initialize_cost_matrix(); | ||
} | ||
return compute_breakpoints(); | ||
return compute_breakpoints(num_bkps); | ||
} | ||
|
||
void set_parallelization(int num_threads) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These get documented by the method's docstring itself.