diff --git a/Cargo.lock b/Cargo.lock index fd48ec2..7ba84a9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -100,7 +100,7 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "atsc" -version = "0.5.0" +version = "0.7.0" dependencies = [ "average", "bincode", @@ -1414,7 +1414,7 @@ checksum = "9dcc60c0624df774c82a0ef104151231d37da4962957d691c011c852b2473314" [[package]] name = "vsri" -version = "0.1.0" +version = "0.2.0" dependencies = [ "chrono", "log", diff --git a/README.md b/README.md index e30d6dc..855dc25 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,10 @@ ## Table of Contents 1. [TL;DR;](#tldr) -2. [Documentation](#documentation) -3. [Building ATSC](#building-atsc) -4. [What is ATSC?](#what-is-atsc) -5. [Where does ATSC fit?](#where-does-atsc-fit) +2. [What is ATSC?](#what-is-atsc) +3. [When to use ATSC?](#when-to-use-atsc) +4. [Documentation](#documentation) +5. [Building ATSC](#building-atsc) 6. [ATSC Usage](#atsc-usage) 7. [Releases](#releases) 8. [Roadmap](#roadmap) @@ -18,31 +18,14 @@ The fastest way to test ATSC is with a CSV file! 1. Download the latest [release](https://github.com/instaclustr/atsc/releases) -2. Get a CSV file with the proper format (or get one from [tests folder](https://github.com/instaclustr/atsc/tree/main/atsc/tests/csv)) -3. Run it +2. Pick a CSV file from [tests folder](https://github.com/instaclustr/atsc/tree/main/atsc/tests/csv) (Those will have the expected internal format). +3. Execute the following command: -```bash -cargo run --release -- --csv -``` - -## Documentation - -For full documentation please go to [Docs](https://github.com/instaclustr/atsc/tree/main/docs) - -## Building ATSC - -1. Clone the repository: - - ```bash - git clone https://github.com/instaclustr/atsc - cd atsc - ``` + ```bash + cargo run --release -- --csv + ``` -2. Build the project: - - ```bash - cargo build --release - ``` +4. You have a compressed timeseries! ## What is ATSC? @@ -52,10 +35,10 @@ This way, ATSC only needs to store the parametrization of the function and not t ATSC draws inspiration from established compression and signal analysis techniques, achieving significant compression ratios. -In internal testing ATSC compressed from 46 times to 880 times the time series of our databases with a fitting error within 1% of the original time-series. +In internal testing ATSC compressed from 46x to 880x the monitoring timeseries of our databases with a fitting error within 1% of the original time-series. In some cases, ATSC would produce highly compressed data without any data loss (Perfect fitting functions). -ATSC is meant to be used in long term storage of time series, as it benefits from more points to do a better fitting. +ATSC is meant to be used with long term storage of time series, as it benefits from more points to do a better fitting. The decompression of data is faster (up to 40x) vs a slower compression speed, as it is expected that the data might be compressed once and decompressed several times. @@ -68,9 +51,9 @@ Internally ATSC uses the following methods for time series fitting: For a more detailed insight into ATSC read the paper here: [ATSC - A novel approach to time-series compression](https://github.com/instaclustr/atsc/tree/main/paper/ATCS-AdvancedTimeSeriesCompressor.pdf) -Currently, ATSC uses an internal format to process time series (WBRO) and outputs a compressed format (BRO). A CSV to WBRO format is available here: [CSV Compressor](https://github.com/instaclustr/atsc/tree/main/csv-compressor) +ATSC input can be an internal format developed to process time series (WBRO), or a CSV. It outputs a compressed format (BRO). A CSV to WBRO format is available here: [CSV Compressor](https://github.com/instaclustr/atsc/tree/main/csv-compressor) -## Where does ATSC fit? +## When to use ATSC? ATSC fits in any place that needs space reduction in trade for precision. ATSC is to time series what JPG/MP3 is to image/audio. @@ -83,11 +66,30 @@ Example of use cases: * Long, slow moving data series (e.g. Weather data). Those will most probably follow an easy to fit pattern * Data that is meant to be visualized by humans and not machine processed (e.g. Operation teams). With such a small error, under 1%, it shouldn't impact analysis. +## Documentation + +For full documentation please go to [Docs](https://github.com/instaclustr/atsc/tree/main/docs) + +## Building ATSC + +1. Clone the repository: + + ```bash + git clone https://github.com/instaclustr/atsc + cd atsc + ``` + +2. Build the project: + + ```bash + cargo build --release + ``` + ## ATSC Usage ### Prerequisites -* Ensure you have [Rust](https://www.rust-lang.org/tools/install) and Cargo installed on your system. +* Ensure you have [Rust](https://www.rust-lang.org/tools/install) installed on your system. ### Usage @@ -153,6 +155,20 @@ atsc -u ## Releases +### v0.7 - 20/11/2024 + +* Added CSV Support +* Greatly improved documentation +* Improved Benchmark and testing +* Improved FFT compression +* Improved Polynomial compression +* Demo files and generation scripts +* Several fixes and cleanups + +### v0.6 - 09/11/2024 + +* Internal release + ### v0.5 - 30/11/2023 * Added Polynomial Compressor (with 2 variants) diff --git a/atsc/Cargo.toml b/atsc/Cargo.toml index c9bb4c8..6f3472c 100644 --- a/atsc/Cargo.toml +++ b/atsc/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "atsc" -version = "0.5.0" +version = "0.7.0" authors = ["Carlos Rolo "] edition = "2021" license = "Apache-2.0" diff --git a/atsc/demo/README.md b/atsc/demo/README.md new file mode 100644 index 0000000..98623e1 --- /dev/null +++ b/atsc/demo/README.md @@ -0,0 +1,53 @@ +# Demos + +## What are demos? + +ATSC is a lossy compressor tool, as such, the output time series will not match the input one. + +With this in mind it could be important to visualize the difference between output and input and verify if the error introduced is within the expectations. + +So demos were created with the intent of having a quick way to display input vs output for a provided metric. The demo scripts will run the ATSC compressor with 2 different error levels and with all the available compression options. + +The output HTML files (for error levels 1% and 3%) can then be used to visualize and compare the different results with the input file and evaluate the result of ATSC compression. + +Three demo output files are provided so that comparison can be made without needing to run the compressor even once if a quick evaluation is needed. Or if you are curious what is this all about! + +## What is in each file? + +Each HTML file renders the output of all compressor options (`FFT`, `IDW` and `Polynomial`) with an error indicated in the file name (1% or 3%). Also, the input data is rendered. + +On top, it is possible to click in each option to hide display each option and have a visual comparison between options. + +## Contents + +This folder contains scripts to generate the demo `html` files. + +The demo scripts generate 2 comparison files. One for all compressors (`FFT`, `IDW` and `Polynomial`) with an error of 1% and another with a 3% error. + +In this folder there are 3 comparisons from some of the available uncompressed files in [tests folder](https://github.com/instaclustr/atsc/tree/v0.7/atsc/tests). + +The files are the following: + +* IOWait metrics with 1% and 3% error from a CSV file + * comparison-error-1-csv-iowait.html + * comparison-error-3-csv-iowait.html +* Java Heap Usage metrics with 1% and 3% error from a wbro file + * comparison-error-1-heap.html + * comparison-error-3-heap.html +* OS Memory Usage metrics with 1% and 3% error from a wbro file + * comparison-error-1-memory.html + * comparison-error-3-memory.html + +## Create your own demo files + +1. Change into the current directory: + + ```bash + cd atsc/demo + ``` + +2. Execute the Demo. **Note**: If using a `wbro` file, run `run_demo.sh`, if using a `csv` file run `run_demo_csv.sh` + + ```bash + ./run_demo_csv.sh INPUT_FILE + ``` diff --git a/atsc/demo/comparison-error-1-csv-iowait.html b/atsc/demo/comparison-error-1-csv-iowait.html new file mode 100644 index 0000000..1b6d96a --- /dev/null +++ b/atsc/demo/comparison-error-1-csv-iowait.html @@ -0,0 +1,54 @@ + + + + + +Comparison Error Level 1 + + + + + + + + diff --git a/atsc/demo/comparison-error-1-heap.html b/atsc/demo/comparison-error-1-heap.html new file mode 100644 index 0000000..3e38c62 --- /dev/null +++ b/atsc/demo/comparison-error-1-heap.html @@ -0,0 +1,54 @@ + + + + + +Comparison Error Level 1 + + + + + + + + diff --git a/atsc/demo/comparison-error-1-memory.html b/atsc/demo/comparison-error-1-memory.html new file mode 100644 index 0000000..8f8c8b8 --- /dev/null +++ b/atsc/demo/comparison-error-1-memory.html @@ -0,0 +1,36 @@ + + + + + +Comparison Error Level 1 + + + + + + + diff --git a/atsc/demo/comparison-error-3-csv-iowait.html b/atsc/demo/comparison-error-3-csv-iowait.html new file mode 100644 index 0000000..fbebeec --- /dev/null +++ b/atsc/demo/comparison-error-3-csv-iowait.html @@ -0,0 +1,54 @@ + + + + + +Comparison Error Level 3 + + + + + + + + diff --git a/atsc/demo/comparison-error-3-heap.html b/atsc/demo/comparison-error-3-heap.html new file mode 100644 index 0000000..1620a23 --- /dev/null +++ b/atsc/demo/comparison-error-3-heap.html @@ -0,0 +1,54 @@ + + + + + +Comparison Error Level 3 + + + + + + + + diff --git a/atsc/demo/comparison-error-3-memory.html b/atsc/demo/comparison-error-3-memory.html new file mode 100644 index 0000000..08846db --- /dev/null +++ b/atsc/demo/comparison-error-3-memory.html @@ -0,0 +1,36 @@ + + + + + +Comparison Error Level 3 + + + + + + + diff --git a/atsc/src/main.rs b/atsc/src/main.rs index 695d925..c93acf3 100644 --- a/atsc/src/main.rs +++ b/atsc/src/main.rs @@ -177,11 +177,11 @@ struct Args { #[arg(long, value_enum, default_value = "auto")] compressor: CompressorType, - /// Sets the maximum allowed error for the compressed data, must be between 0 and 50. Default is 5 (5%). + /// Sets the maximum allowed error for the compressed data, must be between 0 and 50. Default is 3 (3%). /// 0 is lossless compression /// 50 will do a median filter on the data. /// In between will pick optimize for the error - #[arg(short, long, default_value_t = 5, value_parser = clap::value_parser!(u8).range(0..51), verbatim_doc_comment )] + #[arg(short, long, default_value_t = 3, value_parser = clap::value_parser!(u8).range(0..51), verbatim_doc_comment )] error: u8, /// Uncompresses the input file/directory diff --git a/atsc/tests/README.md b/atsc/tests/README.md new file mode 100644 index 0000000..e385a1d --- /dev/null +++ b/atsc/tests/README.md @@ -0,0 +1,7 @@ +# Integration and End-to-End tests + +## Contents + +Test files (End-to-end and integration) and input for those tests. + +The `csv` folder contain `csv` formatted input and the `wbros` contains the [WBRO](https://github.com/instaclustr/atsc/tree/main/wavbrro) formatted input. diff --git a/csv-compressor/Cargo.toml b/csv-compressor/Cargo.toml index 8a30324..d52bf48 100644 --- a/csv-compressor/Cargo.toml +++ b/csv-compressor/Cargo.toml @@ -7,13 +7,13 @@ description = "Utilizes ATSC functionalities to compress CSV formatted data" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -atsc = { version = "0.5.0", path = "../atsc" } +atsc = { version = "0.7.0", path = "../atsc" } clap = { workspace = true, features = ["derive"] } csv = "1.3.0" serde = { version = "1.0.171", features = ["derive"] } wavbrro = { version = "0.1.0", path = "../wavbrro" } log = "0.4.19" env_logger = "0.11.0" -vsri = { version = "0.1.0", path = "../vsri" } +vsri = { version = "0.2.0", path = "../vsri" } [dev-dependencies] tempdir = "0.3.7" diff --git a/docs/getting-started.md b/docs/getting-started.md index 8eba513..d1abf3c 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -22,7 +22,13 @@ How to build and/or run ATSC **Note**: ATSC was originally built with a specific format in mind `WBRO`. For information about this format and how to use it check the [WAVBRRO](https://github.com/instaclustr/atsc/tree/main/wavbrro) library documentation. -5. Run it +5. Move the `atsc` executable + + ```bash + mv target/release/atsc atsc + ``` + +6. Execute it ```bash atsc --csv diff --git a/vsri/Cargo.toml b/vsri/Cargo.toml index b5bbaa0..4475545 100644 --- a/vsri/Cargo.toml +++ b/vsri/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "vsri" -version = "0.1.0" +version = "0.2.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html