From 58a18c03586ec17566c5291a9800f033c4f24094 Mon Sep 17 00:00:00 2001
From: Carlos Rolo <3799585+cjrolo@users.noreply.github.com>
Date: Thu, 19 Oct 2023 12:06:28 +0100
Subject: [PATCH 1/7] Chunk sizes calculations

---
 brro-compressor/src/main.rs          |  5 ++-
 brro-compressor/src/optimizer/mod.rs | 52 ++++++++++++++++++----------
 brro-compressor/src/utils/mod.rs     | 10 +++++-
 3 files changed, 45 insertions(+), 22 deletions(-)
diff --git a/brro-compressor/src/main.rs b/brro-compressor/src/main.rs
index 53076cd..7b85179 100644
--- a/brro-compressor/src/main.rs
+++ b/brro-compressor/src/main.rs
@@ -84,8 +84,7 @@ fn process_single_file(arguments: &Args) -> Result<(), std::io::Error>  {
 fn compress_data(vec: &Vec<f64>, tag: &MetricTag, arguments: &Args) -> Vec<u8> {
     debug!("Compressing data!");
     let optimizer_results = optimizer::process_data(vec, tag);
-    let _optimizer_results_f: Vec<f64> = optimizer_results.iter().map(|&x| x as f64).collect();
-
+    debug!("Samples in: {}, Samples out: {}", vec.len(), optimizer_results.len());
     let mut cs = CompressedStream::new();
     let compressor = match arguments.compressor {
         CompressorType::Noop => Compressor::Noop,
@@ -96,7 +95,7 @@ fn compress_data(vec: &Vec<f64>, tag: &MetricTag, arguments: &Args) -> Vec<u8> {
         CompressorType::Wavelet => Compressor::Wavelet
     };
 
-    cs.compress_chunk_with(vec, compressor);
+    cs.compress_chunk_with(&optimizer_results, compressor);
     cs.to_bytes()
 }
 
diff --git a/brro-compressor/src/optimizer/mod.rs b/brro-compressor/src/optimizer/mod.rs
index 685e98c..c662a86 100644
--- a/brro-compressor/src/optimizer/mod.rs
+++ b/brro-compressor/src/optimizer/mod.rs
@@ -6,6 +6,15 @@ use log::debug;
 use types::metric_tag::MetricTag;
 use crate::types;
 
+/// Max Frame size, this can aprox. 36h of data at 1point/sec rate, a little more than 1 week at 1point/5sec
+/// and 1 month (30 days) at 1 point/20sec. 
+/// This would be aprox. 1MB of Raw data (131072 * 64bits).
+/// We wouldn't want to decompressed a ton of uncessary data, but for historical view of the data, looking into 1day/week/month at once is very reasonable
+const MAX_FRAME_SIZE: usize = 131072; // 2^17
+/// The Min frame size is one that allows our compressors potentially achieve 100x compression. Currently the most
+/// limited one is the FFT compressor, that needs 3 frequencies at minimum, 3x100 = 300, next power of 2 is 512.
+const MIN_FRAME_SIZE: usize = 512; // 2^9
+
 impl MetricTag {
     #[allow(clippy::wrong_self_convention)]
     fn from_float(&self, x: f64) -> i64 {
@@ -46,25 +55,32 @@ fn to_median_filter(data: &Vec<f64>) -> Vec<i64> {
     filtered
 }
 
+/// This function gets a length and returns a vector with the chunk sizes to feed to the different compressors
+/// A lot of assumptions go into selecting the chunk size, including:
+/// 1. Collection rate - It is not expected that the collection rate exceeds 1point sec (it is expected actually less)
+/// 2. Maximum compression achievable - A compressed frame as overhead and a minimum number of segments, small frames don't allow great compressions
+/// 3. FFT operates faster under power of 2
+fn get_chunks_sizes(len: usize) -> Vec<usize> {
+    Vec::<usize>::with_capacity(MIN_FRAME_SIZE)
+}
+
 /// This should look at the data and return an optimized dataset for a specific compressor,
 /// If a compressor is hand picked, this should be skipped.
-/// TODO: Make it do that
-pub fn process_data(wav_data: &Vec<f64>, tag: &MetricTag) -> Vec<i64> {
-    let mut _bitdepth = 64;
-    let mut _dc_component: i64 = 0;
-    let mut _fractional = true;
-
-    debug!("Tag: {:?}", tag);
-    let data = match tag {
-        MetricTag::Other => Vec::new(),
-        MetricTag::QuasiRandom => to_median_filter(wav_data),
-        _ => {
-            wav_data
-                .iter()
-                .map(|x| tag.from_float(*x))
+pub fn process_data(wav_data: &Vec<f64>, tag: &MetricTag) -> Vec<f64> {
+    // My idea here:
+    // 1. Clean data
+    // 2. Split into good sized chunks (aka power of 2)
+    // 3. Get each chunk into the compressor that it should go
+    // 3.1. Chunks should be at least of a size that it can allow a 100x compression for that given compressor (FFT is 512)
+    let len = wav_data.len();
+    if !len.is_power_of_two() {
+        todo!()
+    }
+    // Cleaning data, removing NaN, etc. This might reduce sample count
+    debug!("Tag: {:?} Len: {}", tag, wav_data.len());
+    // Is len a power of 2? If not try to get the previous power of 2 
+    wav_data.iter()
+                .filter(|x| !(x.is_nan() || x.is_infinite()))
+                .copied()
                 .collect()
-        }
-    };
-    _fractional = false;
-    data
 }
\ No newline at end of file
diff --git a/brro-compressor/src/utils/mod.rs b/brro-compressor/src/utils/mod.rs
index 540e337..3aaf196 100644
--- a/brro-compressor/src/utils/mod.rs
+++ b/brro-compressor/src/utils/mod.rs
@@ -1,4 +1,12 @@
 pub mod reader;
 pub mod writer;
 pub mod error;
-mod file_type_detector;
\ No newline at end of file
+mod file_type_detector;
+
+// Is this the right place?
+pub const fn prev_power_of_two(n: u64) -> u64 {
+    // n = 0 gives highest_bit_set_idx = 0.
+    let highest_bit_set_idx = 63 - (n|1).leading_zeros();
+    // Binary AND of highest bit with n is a no-op, except zero gets wiped.
+    (1 << highest_bit_set_idx) & n
+}
\ No newline at end of file

From dda913d5c66f992a5bae868bc6fc62dfc8be516b Mon Sep 17 00:00:00 2001
From: Carlos Rolo <3799585+cjrolo@users.noreply.github.com>
Date: Thu, 19 Oct 2023 14:18:06 +0100
Subject: [PATCH 2/7] WIP in optimization

---
 brro-compressor/src/optimizer/mod.rs | 123 +++++++++++++++++++++------
 brro-compressor/src/utils/mod.rs     |   2 +-
 2 files changed, 100 insertions(+), 25 deletions(-)

diff --git a/brro-compressor/src/optimizer/mod.rs b/brro-compressor/src/optimizer/mod.rs
index c662a86..6739741 100644
--- a/brro-compressor/src/optimizer/mod.rs
+++ b/brro-compressor/src/optimizer/mod.rs
@@ -4,7 +4,7 @@
 use median::Filter;
 use log::debug;
 use types::metric_tag::MetricTag;
-use crate::types;
+use crate::{types, utils::prev_power_of_two, compressor::Compressor};
 
 /// Max Frame size, this can aprox. 36h of data at 1point/sec rate, a little more than 1 week at 1point/5sec
 /// and 1 month (30 days) at 1 point/20sec. 
@@ -15,6 +15,84 @@ const MAX_FRAME_SIZE: usize = 131072; // 2^17
 /// limited one is the FFT compressor, that needs 3 frequencies at minimum, 3x100 = 300, next power of 2 is 512.
 const MIN_FRAME_SIZE: usize = 512; // 2^9
 
+// My idea here:
+// 1. Clean data
+// 2. Split into good sized chunks (aka power of 2)
+// 3. Get each chunk into the compressor that it should go
+// 3.1. Chunks should be at least of a size that it can allow a 100x compression for that given compressor (FFT is 512)
+// 4. From the clean data and chunk sizes, assign an optimizer for each chunk
+struct OptimizerPlan {
+    pub data: Vec<f64>,
+    pub chunk_sizes: Vec<usize>,
+    pub compressors: Vec<Compressor>,
+}
+
+impl OptimizerPlan {
+    pub fn create_plan(data: Vec<f64>) -> Self {
+        let c_data = OptimizerPlan::clean_data(&data);
+        let chunks = OptimizerPlan::get_chunks_sizes(c_data.len());
+        let optimizer = OptimizerPlan::assign_compressor(&c_data, &chunks, None);
+        OptimizerPlan { data: c_data,
+                        chunk_sizes: chunks,
+                        compressors: optimizer }
+    }
+
+    pub fn create_plan_bounded(data: Vec<f64>, max_error: f32) -> Self {
+        let c_data = OptimizerPlan::clean_data(&data);
+        let chunks = OptimizerPlan::get_chunks_sizes(c_data.len());
+        let optimizer = OptimizerPlan::assign_compressor(&c_data, &chunks, Some(max_error));
+        OptimizerPlan { data: c_data,
+                        chunk_sizes: chunks,
+                        compressors: optimizer }
+    }
+
+    /// Removes NaN and infinite references from the data
+    pub fn clean_data(wav_data: &Vec<f64>) -> Vec<f64> {
+        // Cleaning data, removing NaN, etc. This might reduce sample count
+        wav_data.iter()
+            .filter(|x| !(x.is_nan() || x.is_infinite()))
+            .copied()
+            .collect()
+    }
+
+    /// This function gets a length and returns a vector with the chunk sizes to feed to the different compressors
+    /// A lot of assumptions go into selecting the chunk size, including:
+    /// 1. Collection rate - It is not expected that the collection rate exceeds 1point sec (it is expected actually less)
+    /// 2. Maximum compression achievable - A compressed frame as overhead and a minimum number of segments, small frames don't allow great compressions
+    /// 3. FFT operates faster under power of 2
+    fn get_chunks_sizes(mut len: usize) -> Vec<usize> {
+        let mut chunk_sizes = Vec::<usize>::new();
+        while len > 0 {
+            match len {
+                _ if len >= MAX_FRAME_SIZE => {
+                    chunk_sizes.push(MAX_FRAME_SIZE);
+                    len -= MAX_FRAME_SIZE;
+                },
+                _ if len <= MIN_FRAME_SIZE => {
+                    chunk_sizes.push(len);
+                    len = 0;
+                },
+                _ => {
+                    let size = prev_power_of_two(len);
+                    chunk_sizes.push(size);
+                    len -= size;
+                }
+            }
+        }
+        chunk_sizes
+    }
+
+    /// Assigns a compressor to a chunk of data
+    fn assign_compressor(clean_data: &Vec<f64>, chunks: &Vec<usize>, max_error: Option<f32>) -> Vec<Compressor> {
+        let selection = Vec::with_capacity(chunks.len());
+        match max_error {
+            Some(err) => todo!(),
+            None => return selection,
+        }
+    }
+
+}
+
 impl MetricTag {
     #[allow(clippy::wrong_self_convention)]
     fn from_float(&self, x: f64) -> i64 {
@@ -55,32 +133,29 @@ fn to_median_filter(data: &Vec<f64>) -> Vec<i64> {
     filtered
 }
 
-/// This function gets a length and returns a vector with the chunk sizes to feed to the different compressors
-/// A lot of assumptions go into selecting the chunk size, including:
-/// 1. Collection rate - It is not expected that the collection rate exceeds 1point sec (it is expected actually less)
-/// 2. Maximum compression achievable - A compressed frame as overhead and a minimum number of segments, small frames don't allow great compressions
-/// 3. FFT operates faster under power of 2
-fn get_chunks_sizes(len: usize) -> Vec<usize> {
-    Vec::<usize>::with_capacity(MIN_FRAME_SIZE)
-}
-
 /// This should look at the data and return an optimized dataset for a specific compressor,
 /// If a compressor is hand picked, this should be skipped.
 pub fn process_data(wav_data: &Vec<f64>, tag: &MetricTag) -> Vec<f64> {
-    // My idea here:
-    // 1. Clean data
-    // 2. Split into good sized chunks (aka power of 2)
-    // 3. Get each chunk into the compressor that it should go
-    // 3.1. Chunks should be at least of a size that it can allow a 100x compression for that given compressor (FFT is 512)
-    let len = wav_data.len();
-    if !len.is_power_of_two() {
-        todo!()
-    }
-    // Cleaning data, removing NaN, etc. This might reduce sample count
     debug!("Tag: {:?} Len: {}", tag, wav_data.len());
-    // Is len a power of 2? If not try to get the previous power of 2 
     wav_data.iter()
-                .filter(|x| !(x.is_nan() || x.is_infinite()))
-                .copied()
-                .collect()
+        .filter(|x| !(x.is_nan() || x.is_infinite()))
+        .copied()
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_get_chunks_sizes() {
+        let len_very_large: usize = 131072 * 3 + 1765;
+        let len_small: usize = 31;
+        let len_right_sized: usize = 2048;
+        let len_some_size: usize = 12032;
+        assert_eq!(OptimizerPlan::get_chunks_sizes(len_very_large), [131072, 131072, 131072, 1024, 512, 229]);
+        assert_eq!(OptimizerPlan::get_chunks_sizes(len_small), [31]);
+        assert_eq!(OptimizerPlan::get_chunks_sizes(len_right_sized), [2048]);
+        assert_eq!(OptimizerPlan::get_chunks_sizes(len_some_size), [8192, 2048, 1024, 512, 256]);
+    }
 }
\ No newline at end of file
diff --git a/brro-compressor/src/utils/mod.rs b/brro-compressor/src/utils/mod.rs
index 3aaf196..4d28aef 100644
--- a/brro-compressor/src/utils/mod.rs
+++ b/brro-compressor/src/utils/mod.rs
@@ -4,7 +4,7 @@ pub mod error;
 mod file_type_detector;
 
 // Is this the right place?
-pub const fn prev_power_of_two(n: u64) -> u64 {
+pub fn prev_power_of_two(n: usize) -> usize {
     // n = 0 gives highest_bit_set_idx = 0.
     let highest_bit_set_idx = 63 - (n|1).leading_zeros();
     // Binary AND of highest bit with n is a no-op, except zero gets wiped.

From 7f891cd05d8a5b049139b40fc3cb3cee5c2b68aa Mon Sep 17 00:00:00 2001
From: Carlos Rolo <3799585+cjrolo@users.noreply.github.com>
Date: Thu, 19 Oct 2023 16:55:49 +0100
Subject: [PATCH 3/7] Moved code around. Implemented Optimzer struct

---
 Cargo.lock                              | 34 +++++++++++
 brro-compressor/Cargo.toml              |  2 +-
 brro-compressor/src/optimizer/mod.rs    | 81 ++++++++++---------------
 brro-compressor/src/types/metric_tag.rs | 42 +++++++++++++
 brro-compressor/src/utils/mod.rs        |  7 +++
 5 files changed, 115 insertions(+), 51 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a650878..a4f090a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -118,6 +118,17 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
 
+[[package]]
+name = "average"
+version = "0.14.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d804c74bb2d66e9b7047658d21af0f1c937d7d2466410cbf1aed3b0c04048d4"
+dependencies = [
+ "easy-cast",
+ "float-ord",
+ "num-traits",
+]
+
 [[package]]
 name = "backtrace"
 version = "0.3.68"
@@ -189,6 +200,7 @@ dependencies = [
 name = "brro-compressor"
 version = "0.1.0"
 dependencies = [
+ "average",
  "bincode",
  "clap",
  "env_logger",
@@ -387,6 +399,15 @@ version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "347675b2993d588e8506457ea2de0e64a89ad0fcbc0e79d07d25f50542f40b59"
 
+[[package]]
+name = "easy-cast"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "10936778145f3bea71fd9bf61332cce28c28e96a380714f7ab34838b80733fd6"
+dependencies = [
+ "libm",
+]
+
 [[package]]
 name = "either"
 version = "1.8.1"
@@ -451,6 +472,12 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
 
+[[package]]
+name = "float-ord"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ce81f49ae8a0482e4c55ea62ebbd7e5a686af544c00b9d090bba3ff9be97b3d"
+
 [[package]]
 name = "fnv"
 version = "1.0.7"
@@ -824,6 +851,12 @@ version = "0.2.147"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
 
+[[package]]
+name = "libm"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
+
 [[package]]
 name = "linux-raw-sys"
 version = "0.3.8"
@@ -971,6 +1004,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
 dependencies = [
  "autocfg",
+ "libm",
 ]
 
 [[package]]
diff --git a/brro-compressor/Cargo.toml b/brro-compressor/Cargo.toml
index 19f642f..b82e3fb 100644
--- a/brro-compressor/Cargo.toml
+++ b/brro-compressor/Cargo.toml
@@ -14,7 +14,7 @@ clap = {version = "4.3.14", features = ["derive"] }
 bincode = "2.0.0-rc.3"
 rustfft = "6.1.0"
 tempfile = "3.2"
-
+average = "0.14.1"
 regex = "1.9.1"
 hound = "3.5"
 median = "0.3.2"
\ No newline at end of file
diff --git a/brro-compressor/src/optimizer/mod.rs b/brro-compressor/src/optimizer/mod.rs
index 6739741..d628c64 100644
--- a/brro-compressor/src/optimizer/mod.rs
+++ b/brro-compressor/src/optimizer/mod.rs
@@ -1,10 +1,6 @@
-// Lucas - Once the project is far enough along I strongly reccomend reenabling dead code checks
-#![allow(dead_code)]
-
-use median::Filter;
 use log::debug;
 use types::metric_tag::MetricTag;
-use crate::{types, utils::prev_power_of_two, compressor::Compressor};
+use crate::{types, utils::{prev_power_of_two, f64_to_u64}, compressor::Compressor};
 
 /// Max Frame size, this can aprox. 36h of data at 1point/sec rate, a little more than 1 week at 1point/5sec
 /// and 1 month (30 days) at 1 point/20sec. 
@@ -21,6 +17,7 @@ const MIN_FRAME_SIZE: usize = 512; // 2^9
 // 3. Get each chunk into the compressor that it should go
 // 3.1. Chunks should be at least of a size that it can allow a 100x compression for that given compressor (FFT is 512)
 // 4. From the clean data and chunk sizes, assign an optimizer for each chunk
+#[derive(Debug, Clone)]
 struct OptimizerPlan {
     pub data: Vec<f64>,
     pub chunk_sizes: Vec<usize>,
@@ -28,7 +25,7 @@ struct OptimizerPlan {
 }
 
 impl OptimizerPlan {
-    pub fn create_plan(data: Vec<f64>) -> Self {
+    pub fn plan(data: Vec<f64>) -> Self {
         let c_data = OptimizerPlan::clean_data(&data);
         let chunks = OptimizerPlan::get_chunks_sizes(c_data.len());
         let optimizer = OptimizerPlan::assign_compressor(&c_data, &chunks, None);
@@ -37,7 +34,7 @@ impl OptimizerPlan {
                         compressors: optimizer }
     }
 
-    pub fn create_plan_bounded(data: Vec<f64>, max_error: f32) -> Self {
+    pub fn plan_bounded(data: Vec<f64>, max_error: f32) -> Self {
         let c_data = OptimizerPlan::clean_data(&data);
         let chunks = OptimizerPlan::get_chunks_sizes(c_data.len());
         let optimizer = OptimizerPlan::assign_compressor(&c_data, &chunks, Some(max_error));
@@ -82,55 +79,30 @@ impl OptimizerPlan {
         chunk_sizes
     }
 
+    /// Walks the data, checks how much variability is in the data, and assigns a compressor based on that
+    /// NOTE: Is this any good? 
+    fn best_compressor(data: &[f64]) -> Compressor {
+        let _ = data.iter().map(|&f| f64_to_u64(f, 0));
+        // For now, let's just return FFT
+        Compressor::FFT
+    }
+
     /// Assigns a compressor to a chunk of data
     fn assign_compressor(clean_data: &Vec<f64>, chunks: &Vec<usize>, max_error: Option<f32>) -> Vec<Compressor> {
-        let selection = Vec::with_capacity(chunks.len());
+        let mut selection = Vec::with_capacity(chunks.len());
         match max_error {
-            Some(err) => todo!(),
-            None => return selection,
-        }
-    }
-
-}
-
-impl MetricTag {
-    #[allow(clippy::wrong_self_convention)]
-    fn from_float(&self, x: f64) -> i64 {
-        match self {
-            MetricTag::Other => {
-                0
-            }
-            MetricTag::NotFloat | MetricTag::QuasiRandom => {
-                x as i64
-            }
-            MetricTag::Percent(y) => {
-                to_multiply_and_truncate(x, *y)
-            }
-            MetricTag::Duration(y) => {
-                to_multiply_and_truncate(x, *y)
-            }
-            MetricTag::Bytes(y) => {
-                (x as i64) / (*y as i64)
-            }
+            Some(_err) => todo!(),
+            None => {
+                let mut s = 0;
+                for size in chunks.iter() {
+                    selection.push(OptimizerPlan::best_compressor(&clean_data[s..(s+*size-1)]));
+                    s += *size;
+                }
+            },
         }
+        selection
     }
-}
-
-/// Converts a float via multiplication and truncation
-fn to_multiply_and_truncate(number: f64, mul: i32) -> i64 {
-    (number * mul as f64) as i64
-}
 
-fn to_median_filter(data: &Vec<f64>) -> Vec<i64> {
-    let mut filtered = Vec::with_capacity(data.len());
-    // 10minutes of data
-    let mut filter = Filter::new(50);
-    for point in data {
-        let point_int = MetricTag::QuasiRandom.from_float(*point);
-        let median = filter.consume(point_int);
-        filtered.push(median)
-    }
-    filtered
 }
 
 /// This should look at the data and return an optimized dataset for a specific compressor,
@@ -158,4 +130,13 @@ mod tests {
         assert_eq!(OptimizerPlan::get_chunks_sizes(len_right_sized), [2048]);
         assert_eq!(OptimizerPlan::get_chunks_sizes(len_some_size), [8192, 2048, 1024, 512, 256]);
     }
+
+    #[test]
+    fn assign_compressor() {
+        let fake_data = vec![12.23; 132671];
+        let chunks = OptimizerPlan::get_chunks_sizes(fake_data.len());
+        println!("{:?}", chunks);
+        let compressor_vec = OptimizerPlan::assign_compressor(&fake_data, &chunks, None);
+        assert_eq!(compressor_vec.len(), 4);
+    }
 }
\ No newline at end of file
diff --git a/brro-compressor/src/types/metric_tag.rs b/brro-compressor/src/types/metric_tag.rs
index 4fa5236..34cc3ac 100644
--- a/brro-compressor/src/types/metric_tag.rs
+++ b/brro-compressor/src/types/metric_tag.rs
@@ -1,3 +1,5 @@
+use median::Filter;
+
 #[derive(Debug)]
 pub enum MetricTag {
     Percent(i32),
@@ -11,4 +13,44 @@ pub enum MetricTag {
     Bytes(i32),
     // Data that is in bytes... Make it MB, or KB
     Other, // Everything else
+}
+
+impl MetricTag {
+    #[allow(clippy::wrong_self_convention)]
+    fn from_float(&self, x: f64) -> i64 {
+        match self {
+            MetricTag::Other => {
+                0
+            }
+            MetricTag::NotFloat | MetricTag::QuasiRandom => {
+                x as i64
+            }
+            MetricTag::Percent(y) => {
+                Self::to_multiply_and_truncate(x, *y)
+            }
+            MetricTag::Duration(y) => {
+                Self::to_multiply_and_truncate(x, *y)
+            }
+            MetricTag::Bytes(y) => {
+                (x as i64) / (*y as i64)
+            }
+        }
+    }
+
+    /// Converts a float via multiplication and truncation
+    fn to_multiply_and_truncate(number: f64, mul: i32) -> i64 {
+        (number * mul as f64) as i64
+    }
+
+    fn to_median_filter(data: &Vec<f64>) -> Vec<i64> {
+        let mut filtered = Vec::with_capacity(data.len());
+        // 10minutes of data
+        let mut filter = Filter::new(50);
+        for point in data {
+            let point_int = MetricTag::QuasiRandom.from_float(*point);
+            let median = filter.consume(point_int);
+            filtered.push(median)
+        }
+        filtered
+    }
 }
\ No newline at end of file
diff --git a/brro-compressor/src/utils/mod.rs b/brro-compressor/src/utils/mod.rs
index 4d28aef..7374a9b 100644
--- a/brro-compressor/src/utils/mod.rs
+++ b/brro-compressor/src/utils/mod.rs
@@ -9,4 +9,11 @@ pub fn prev_power_of_two(n: usize) -> usize {
     let highest_bit_set_idx = 63 - (n|1).leading_zeros();
     // Binary AND of highest bit with n is a no-op, except zero gets wiped.
     (1 << highest_bit_set_idx) & n
+}
+
+/// Converts a float to u64 with a given precision
+pub fn f64_to_u64(number: f64, precision: usize) -> u64 {
+    if precision > 6 { panic!("Precision only available up to 6 digits!")}
+    let mul = [1, 10, 100, 1_000, 10_000, 100_000, 1_000_000][precision];
+    (number * mul as f64) as u64
 }
\ No newline at end of file

From edb14206350d1b005b0edf11f81ddcbd1368c264 Mon Sep 17 00:00:00 2001
From: Carlos Rolo <3799585+cjrolo@users.noreply.github.com>
Date: Thu, 19 Oct 2023 17:42:43 +0100
Subject: [PATCH 4/7] Tests and execution plan

---
 brro-compressor/src/optimizer/mod.rs | 38 ++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/brro-compressor/src/optimizer/mod.rs b/brro-compressor/src/optimizer/mod.rs
index d628c64..65c4857 100644
--- a/brro-compressor/src/optimizer/mod.rs
+++ b/brro-compressor/src/optimizer/mod.rs
@@ -25,6 +25,8 @@ struct OptimizerPlan {
 }
 
 impl OptimizerPlan {
+    
+    /// Creates an optimal data compression plan
     pub fn plan(data: Vec<f64>) -> Self {
         let c_data = OptimizerPlan::clean_data(&data);
         let chunks = OptimizerPlan::get_chunks_sizes(c_data.len());
@@ -34,7 +36,9 @@ impl OptimizerPlan {
                         compressors: optimizer }
     }
 
+    /// Creates an optimal plan for compression for the data set provided bound by a given error
     pub fn plan_bounded(data: Vec<f64>, max_error: f32) -> Self {
+        // TODO: Check error limits
         let c_data = OptimizerPlan::clean_data(&data);
         let chunks = OptimizerPlan::get_chunks_sizes(c_data.len());
         let optimizer = OptimizerPlan::assign_compressor(&c_data, &chunks, Some(max_error));
@@ -43,8 +47,14 @@ impl OptimizerPlan {
                         compressors: optimizer }
     }
 
+    /// Sets a given compressor for all data chunks
+    pub fn set_compressor(&mut self, compressor: Compressor) {
+        let new_compressors = vec![compressor; self.compressors.len()];
+        self.compressors = new_compressors;
+    }
+
     /// Removes NaN and infinite references from the data
-    pub fn clean_data(wav_data: &Vec<f64>) -> Vec<f64> {
+    pub fn clean_data(wav_data: &[f64]) -> Vec<f64> {
         // Cleaning data, removing NaN, etc. This might reduce sample count
         wav_data.iter()
             .filter(|x| !(x.is_nan() || x.is_infinite()))
@@ -79,23 +89,34 @@ impl OptimizerPlan {
         chunk_sizes
     }
 
+    /// Returns an iterator with the data slice and the compressor associated
+    pub fn get_execution(&self) ->  Vec<(&Compressor, &[f64])> {
+        let mut output = Vec::with_capacity(self.chunk_sizes.len());
+        let mut s = 0;
+        for (i,size) in self.chunk_sizes.iter().enumerate() {
+            output.push((&self.compressors[i] ,&self.data[s..(s+*size)]));
+            s += *size;
+        }
+        output
+    }
+
     /// Walks the data, checks how much variability is in the data, and assigns a compressor based on that
     /// NOTE: Is this any good? 
-    fn best_compressor(data: &[f64]) -> Compressor {
+    fn get_compressor(data: &[f64]) -> Compressor {
         let _ = data.iter().map(|&f| f64_to_u64(f, 0));
         // For now, let's just return FFT
         Compressor::FFT
     }
 
     /// Assigns a compressor to a chunk of data
-    fn assign_compressor(clean_data: &Vec<f64>, chunks: &Vec<usize>, max_error: Option<f32>) -> Vec<Compressor> {
+    fn assign_compressor(clean_data: &[f64], chunks: &Vec<usize>, max_error: Option<f32>) -> Vec<Compressor> {
         let mut selection = Vec::with_capacity(chunks.len());
         match max_error {
             Some(_err) => todo!(),
             None => {
                 let mut s = 0;
                 for size in chunks.iter() {
-                    selection.push(OptimizerPlan::best_compressor(&clean_data[s..(s+*size-1)]));
+                    selection.push(OptimizerPlan::get_compressor(&clean_data[s..(s+*size)]));
                     s += *size;
                 }
             },
@@ -119,6 +140,14 @@ pub fn process_data(wav_data: &Vec<f64>, tag: &MetricTag) -> Vec<f64> {
 mod tests {
     use super::*;
 
+    #[test]
+    fn optimizer() {
+        let fake_data = vec![12.23; 2049];
+        let op = OptimizerPlan::plan(fake_data);
+        let plan_vec = op.get_execution();
+        assert_eq!(plan_vec.len(), 2);
+    }
+
     #[test]
     fn test_get_chunks_sizes() {
         let len_very_large: usize = 131072 * 3 + 1765;
@@ -135,7 +164,6 @@ mod tests {
     fn assign_compressor() {
         let fake_data = vec![12.23; 132671];
         let chunks = OptimizerPlan::get_chunks_sizes(fake_data.len());
-        println!("{:?}", chunks);
         let compressor_vec = OptimizerPlan::assign_compressor(&fake_data, &chunks, None);
         assert_eq!(compressor_vec.len(), 4);
     }

From 310ce1d041ab4e213efbe2fa651a4645d144e32f Mon Sep 17 00:00:00 2001
From: Carlos Rolo <3799585+cjrolo@users.noreply.github.com>
Date: Thu, 19 Oct 2023 19:06:05 +0100
Subject: [PATCH 5/7] small fix

---
 brro-compressor/src/utils/mod.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/brro-compressor/src/utils/mod.rs b/brro-compressor/src/utils/mod.rs
index 7374a9b..e2e0cb8 100644
--- a/brro-compressor/src/utils/mod.rs
+++ b/brro-compressor/src/utils/mod.rs
@@ -13,6 +13,7 @@ pub fn prev_power_of_two(n: usize) -> usize {
 
 /// Converts a float to u64 with a given precision
 pub fn f64_to_u64(number: f64, precision: usize) -> u64 {
+    // TODO: Panic on overflow
     if precision > 6 { panic!("Precision only available up to 6 digits!")}
     let mul = [1, 10, 100, 1_000, 10_000, 100_000, 1_000_000][precision];
     (number * mul as f64) as u64

From 037544afd09386a9bbae5f8c6f8d2af84a979689 Mon Sep 17 00:00:00 2001
From: Carlos Rolo <3799585+cjrolo@users.noreply.github.com>
Date: Thu, 19 Oct 2023 19:12:25 +0100
Subject: [PATCH 6/7] Minor fixes

---
 brro-compressor/src/optimizer/mod.rs | 2 +-
 brro-compressor/src/utils/mod.rs     | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/brro-compressor/src/optimizer/mod.rs b/brro-compressor/src/optimizer/mod.rs
index 65c4857..0903a48 100644
--- a/brro-compressor/src/optimizer/mod.rs
+++ b/brro-compressor/src/optimizer/mod.rs
@@ -128,7 +128,7 @@ impl OptimizerPlan {
 
 /// This should look at the data and return an optimized dataset for a specific compressor,
 /// If a compressor is hand picked, this should be skipped.
-pub fn process_data(wav_data: &Vec<f64>, tag: &MetricTag) -> Vec<f64> {
+pub fn process_data(wav_data: &[f64], tag: &MetricTag) -> Vec<f64> {
     debug!("Tag: {:?} Len: {}", tag, wav_data.len());
     wav_data.iter()
         .filter(|x| !(x.is_nan() || x.is_infinite()))
diff --git a/brro-compressor/src/utils/mod.rs b/brro-compressor/src/utils/mod.rs
index f5308b5..c4f2e84 100644
--- a/brro-compressor/src/utils/mod.rs
+++ b/brro-compressor/src/utils/mod.rs
@@ -1,6 +1,7 @@
 pub mod error;
-pub mod readers;
 pub mod writers;
+pub mod readers;
+
 mod file_type_detector;
 
 // Is this the right place?

From 247d2ea0fcbe3a96642a098767ac4c4154f3f9e2 Mon Sep 17 00:00:00 2001
From: Carlos Rolo <3799585+cjrolo@users.noreply.github.com>
Date: Fri, 20 Oct 2023 09:27:48 +0100
Subject: [PATCH 7/7] PR fixes

---
 brro-compressor/src/optimizer/mod.rs    | 2 +-
 brro-compressor/src/types/metric_tag.rs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/brro-compressor/src/optimizer/mod.rs b/brro-compressor/src/optimizer/mod.rs
index 0903a48..986f12c 100644
--- a/brro-compressor/src/optimizer/mod.rs
+++ b/brro-compressor/src/optimizer/mod.rs
@@ -109,7 +109,7 @@ impl OptimizerPlan {
     }
 
     /// Assigns a compressor to a chunk of data
-    fn assign_compressor(clean_data: &[f64], chunks: &Vec<usize>, max_error: Option<f32>) -> Vec<Compressor> {
+    fn assign_compressor(clean_data: &[f64], chunks: &[usize], max_error: Option<f32>) -> Vec<Compressor> {
         let mut selection = Vec::with_capacity(chunks.len());
         match max_error {
             Some(_err) => todo!(),
diff --git a/brro-compressor/src/types/metric_tag.rs b/brro-compressor/src/types/metric_tag.rs
index 34cc3ac..03d8c3c 100644
--- a/brro-compressor/src/types/metric_tag.rs
+++ b/brro-compressor/src/types/metric_tag.rs
@@ -42,7 +42,7 @@ impl MetricTag {
         (number * mul as f64) as i64
     }
 
-    fn to_median_filter(data: &Vec<f64>) -> Vec<i64> {
+    fn to_median_filter(data: &[f64]) -> Vec<i64> {
         let mut filtered = Vec::with_capacity(data.len());
         // 10minutes of data
         let mut filter = Filter::new(50);