Merge pull request #82 from instaclustr/poly-fixes

Allowing polynomial to compress bounded
instaclustr · Nov 29, 2023 · 4292bfd · 4292bfd
2 parents 7286a87 + d17b091
commit 4292bfd
Show file tree

Hide file tree

Showing 4 changed files with 53 additions and 21 deletions.
diff --git a/brro-compressor/src/compressor/fft.rs b/brro-compressor/src/compressor/fft.rs
@@ -232,10 +232,10 @@ impl FFT {
                            .collect();
             current_err = calculate_error(data, &out_data);
             trace!("Current Err: {}", current_err);
-            // Max iterations is 18 (We start at 10%, we can go to 95% and 1% at a time)
+            // Max iterations is 22 (We start at 10%, we can go to 95% and 1% at a time)
             match iterations {
-                1..=17 => jump += max_freq/2,
-                18..=22 => jump += max_freq/10,
+                1..=17 => jump += (max_freq/2).max(1),
+                18..=22 => jump += (max_freq/10).max(1),
                 _ => break
             }
         }

diff --git a/brro-compressor/src/compressor/mod.rs b/brro-compressor/src/compressor/mod.rs
@@ -4,7 +4,7 @@ use bincode::{Decode, Encode};
 use self::constant::{constant, constant_to_data};
 use self::fft::{fft, fft_to_data, fft_allowed_error};
 use self::noop::{noop, noop_to_data};
-use self::polynomial::{polynomial, to_data, PolynomialType};
+use self::polynomial::{polynomial, to_data, PolynomialType, polynomial_allowed_error};
 
 pub mod noop;
 pub mod constant;
@@ -39,8 +39,8 @@ impl Compressor {
             Compressor::Noop => noop(data),
             Compressor::FFT => fft_allowed_error(data, max_error),
             Compressor::Constant => constant(data),
-            Compressor::Polynomial => polynomial(data, PolynomialType::Polynomial),
-            Compressor::Idw => polynomial(data, PolynomialType::Idw),
+            Compressor::Polynomial => polynomial_allowed_error(data, max_error, PolynomialType::Polynomial),
+            Compressor::Idw => polynomial_allowed_error(data, max_error, PolynomialType::Idw),
             _ => todo!(),
         }
     }

diff --git a/brro-compressor/src/compressor/polynomial.rs b/brro-compressor/src/compressor/polynomial.rs
@@ -1,4 +1,4 @@
-use crate::utils::{DECIMAL_PRECISION, error::calculate_error, round_and_limit_f64};
+use crate::utils::{DECIMAL_PRECISION, error::calculate_error, round_and_limit_f64, round_f64};
 
 use super::BinConfig;
 use bincode::{Decode, Encode};
@@ -73,6 +73,10 @@ impl Polynomial {
     }
 
     pub fn compress_bounded(&mut self, data: &[f64], max_err: f64) {
+        if self.max_value == self.min_value { 
+            debug!("Same max and min, we're done here!");
+            return
+        }
         // TODO: Big one, read below
         // To reduce error we add more points to the polynomial, but, we also might add residuals
         // each residual is 1/data_lenght * 100% less compression, each jump is 5% less compression. 
@@ -84,8 +88,10 @@ impl Polynomial {
         let mut current_err = max_err + 1.0;
         let mut jump: usize = 0;
         let mut iterations = 0;
-
-        while ((max_err * 1000.0) as i32) < ((current_err * 1000.0) as i32) {
+        // Locking max target error precision to 0.1%
+        let target_error = round_f64(max_err, 3);
+        while target_error < round_f64(current_err, 4) {
+            trace!("Method: {:?} Iterations: {} Error: {} Target: {}", method, iterations, current_err, target_error);
             iterations += 1;
             self.compress_hinted(data, baseline_points+jump);
             let out_data = match method {
@@ -98,22 +104,31 @@ impl Polynomial {
             trace!("Current Err: {}", current_err);
             // Max iterations is 18 (We start at 10%, we can go to 95% and 1% at a time)
             match iterations {
-                1..=17 => jump += baseline_points/2,
-                18..=22 => jump += baseline_points/10,
-                _ => break
+                // We should always increase by 1 in worst case
+                1..=17 => jump += (data_len/10).max(1),
+                18..=22 => jump += (data_len/100).max(1),
+                // No more jumping, but we landed right in the end
+                _ if target_error > round_f64(current_err, 4) => break,
+                // We can't hit the target, store everything
+                _ => {
+                    self.compress_hinted(data, data_len);
+                    break;
+                }
+
             }
             if self.data_points.len() == data_len {
                 // Storing the whole thing anyway...
                 break;
             }
         }
+        debug!("Final Stored Data Lenght: {}", self.data_points.len());
     } 
 
     pub fn compress_hinted(&mut self, data: &[f64], points: usize) {
         if self.max_value == self.min_value { 
             debug!("Same max and min, we're done here!");
             return
-         }
+        }
         // The algorithm is simple, Select 10% of the data points, calculate the Polynomial based on those data points
         // Plus the max and min
         let data_len = data.len();
@@ -195,10 +210,7 @@ impl Polynomial {
         let mut points: Vec<usize> = (0..frame_size).step_by(self.point_step as usize).collect();
         if  points.last() != Some(&(frame_size - 1)) { points.push(frame_size-1); }
         // If they differ, it means I added either max and/or min
-        let point_dif = self.data_points.len() - points.len();
-        // I can calculate the positions from here
-        debug!("Points diff: {}", point_dif);
-        if point_dif > 0 {
+        if self.data_points.len() != points.len() {
             let mut prev_pos = points[0];
             for (array_position, &position_value) in points.clone().iter().enumerate() {
                 if self.min_position > prev_pos && self.min_position < position_value {
@@ -234,10 +246,16 @@ impl Polynomial {
         }
         let spline = Spline::from_vec(key_vec);
         // Build the data
-        // TODO: It gives values below minimum
-        (0..frame_size)
-        .map(|f| round_and_limit_f64(spline.clamped_sample(f as f64).unwrap(), self.min_value.into(), self.max_value.into(), DECIMAL_PRECISION))
-        .collect() 
+        // There is a problem with the spline calculation, that it might get a value for all positions. In those cases
+        // we return the good value calculated. If that doesn't exist, we return the minimum value 
+        let mut out_vec = Vec::with_capacity(frame_size);
+        let mut prev = self.min_value as f64;
+        for value in 0..frame_size {
+            let spline_value = spline.clamped_sample(value as f64).unwrap_or(prev);
+            prev = spline_value;
+            out_vec.push(round_and_limit_f64(spline_value, self.min_value.into(), self.max_value.into(), DECIMAL_PRECISION));
+        }
+        out_vec
     }
 
     pub fn idw_to_data(&self, frame_size: usize) -> Vec<f64> {
@@ -375,4 +393,17 @@ mod tests {
         let e = calculate_error(&vector1, &out);
         assert!(e <= 0.5);
     }
+
+    #[test]
+    fn test_line_polynomial() {
+        let vector1 = vec![1.0, 1.0, 1.0, 1.0];
+        assert_eq!(polynomial(&vector1, PolynomialType::Polynomial), [0, 0, 0, 0, 128, 63, 0, 0, 0, 128, 63, 0, 1]);
+    }
+
+    #[test]
+    fn test_line_idw() {
+        let vector1 = vec![1.0, 1.0, 1.0, 1.0];
+        assert_eq!(polynomial(&vector1, PolynomialType::Idw), [1, 0, 0, 0, 128, 63, 0, 0, 0, 128, 63, 0, 1]);
+    }
+
 }
diff --git a/brro-compressor/src/utils/error.rs b/brro-compressor/src/utils/error.rs
@@ -95,6 +95,7 @@ pub fn error_mape(original: &[f64], generated: &[f64]) -> f64 {
         .zip(generated.iter())
         .map(|(original, generated)| ((generated - original)/original).abs())
         .sum();
+    // TODO: NaN needs to be handled
     abs_error / original.len() as f64
 }