From c724a4b797965d78a8aad902fba1572c5c4f40ab Mon Sep 17 00:00:00 2001 From: JooYoung Park Date: Fri, 24 Nov 2023 02:02:13 -0700 Subject: [PATCH] fix reference traces, docs, gitignore and numpy collision Signed-off-by: JooYoung Park add finer-step reference trace Signed-off-by: JooYoung Park added number of function suggestion to cores Signed-off-by: JooYoung Park fix wordlist Signed-off-by: JooYoung Park fixed docs Signed-off-by: JooYoung Park add word to wordlist Signed-off-by: JooYoung Park fix doc Signed-off-by: JooYoung Park Update reference traces docs Signed-off-by: Leonid Kondrashov Update docs/loader.md Co-authored-by: Dmitrii Ustiugov resampled the reference traces that were sampled wrongly Signed-off-by: JooYoung Park removed statistics from sampled traces Signed-off-by: JooYoung Park --- .github/configs/wordlist.txt | 4 ++- .github/workflows/integration_tests.yaml | 2 +- .gitignore | 3 --- data/traces/reference/.gitignore | 1 + data/traces/reference/sampled_150.tar.gz | 4 +-- docs/loader.md | 4 +++ docs/sampler.md | 33 +++++++++++++++--------- requirements.txt | 4 +-- sampler/__main__.py | 1 + 9 files changed, 35 insertions(+), 21 deletions(-) create mode 100644 data/traces/reference/.gitignore diff --git a/.github/configs/wordlist.txt b/.github/configs/wordlist.txt index 6987d4d4b..653593743 100644 --- a/.github/configs/wordlist.txt +++ b/.github/configs/wordlist.txt @@ -748,4 +748,6 @@ Lazar Cvetkovic cvetkovic ethz -lazar \ No newline at end of file +lazar +xvzf +untar \ No newline at end of file diff --git a/.github/workflows/integration_tests.yaml b/.github/workflows/integration_tests.yaml index 966c992e4..201d704c0 100644 --- a/.github/workflows/integration_tests.yaml +++ b/.github/workflows/integration_tests.yaml @@ -43,7 +43,7 @@ jobs: - name: Drawing samples run: | tar -xzvf $tpath/inputs/preprocessed.tar.gz -C $tpath/inputs/ - python -m sampler sample --source_trace $tpath/inputs/preprocessed --output $tpath/sampled --min-size 10 --step-size=10 --max-size=50 + python -m sampler sample --source_trace $tpath/inputs/preprocessed --original_trace $tpath/inputs/preprocessed --output $tpath/sampled --min-size 10 --step-size=10 --max-size=50 # - name: Plotting results # run: | diff --git a/.gitignore b/.gitignore index fcb07a3e5..169aab07f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,10 +5,7 @@ analysis tmp data/out data/azure -data/traces/* !data/traces/example/ -data/traces/reference/*/*.csv -!data/traces/reference/ pkg/generator/*.png pkg/generator/*.txt pkg/driver/*.csv diff --git a/data/traces/reference/.gitignore b/data/traces/reference/.gitignore new file mode 100644 index 000000000..16f2dc5fa --- /dev/null +++ b/data/traces/reference/.gitignore @@ -0,0 +1 @@ +*.csv \ No newline at end of file diff --git a/data/traces/reference/sampled_150.tar.gz b/data/traces/reference/sampled_150.tar.gz index b3cdc048b..f99ae58f4 100644 --- a/data/traces/reference/sampled_150.tar.gz +++ b/data/traces/reference/sampled_150.tar.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c1718c0b19e45f3001836806f6c968214ffe5764ae3e5d2e894eeab99d9d2f2 -size 139036379 +oid sha256:338d824b5208bf39dce5276918e0b240c193154baa28eba552856b0d0d496398 +size 139414130 diff --git a/docs/loader.md b/docs/loader.md index 7b26b4862..5ad1ca9f3 100644 --- a/docs/loader.md +++ b/docs/loader.md @@ -124,6 +124,10 @@ For to configure the workload for load generator, please refer to `docs/configur There are a couple of constants that should not be exposed to the users. They can be examined and changed in `pkg/common/constants.go`. +Sample sizes appropriate for performance evaluation vary depending on the platform. +As a starting point for fine-tuning, we suggest at most 5 functions per core with SMT disabled. +For example, 80 functions for a 16-core node. With larger sample sizes, trace replaying may lead to failures in function invocations. + ## Build the image for a synthetic function The reason for existence of Firecracker and container version is because of different ports for gRPC server. Firecracker diff --git a/docs/sampler.md b/docs/sampler.md index 3d4fbfde8..b194c2aa6 100644 --- a/docs/sampler.md +++ b/docs/sampler.md @@ -12,7 +12,7 @@ git lfs install cd sampler git lfs fetch git lfs checkout -pip install -r requirements.txt +pip install -r ../requirements.txt ``` ## Pre-processing the original trace (mandatory) @@ -91,9 +91,9 @@ monotonic load increase (in terms of resource usage) when sweeping the sample si ```console python3 -m sampler sample -h -usage: sample [-h] -t path -o path [-min integer] [-st integer] [-max integer] [-tr integer] +usage: sample [-h] -t path -orig path -o path [-min integer] [-st integer] [-max integer] [-tr integer] -optional arguments: +options: -h, --help show this help message and exit -t path, --source_trace path Path to trace to draw samples from @@ -113,22 +113,31 @@ optional arguments: ## Reference traces -The reference traces are stored in `data/traces/reference` folder of this repository, as `preprocessed.tar.gz` and -`sampled.tar.gz` files stored in Git LFS. +The reference traces are stored in `data/traces/reference` folder of this repository, as `preprocessed_150.tar.gz` and +`sampled_150.tar.gz` files stored in Git LFS. + +`preprocessed_150.tar.gz` contains the preprocessed traces for the original Azure trace for day 1, 09:00:00-11:30:00 (150 +minutes total). 150 minutes trace captures approximately half of all functions from original Azure trace, but makes it +more suitable to run in shorter experiments (10 minutes - 2 hours). + +`sampled_150.tar.gz` contains the sampled traces for preprocessed trace from `preprocessed_150.tar.gz`. Sample sizes are +10-200 functions with step 10, 200-3k with step 50, and 3k-24k with step 1k. -`preprocessed.tar.gz` contains the preprocessed traces for the original Azure trace for day 1, 09:00:00-11:30:00 (150 -minutes total). +You can untar the tarballs with the following commands: -`sampled.tar.gz` contains the sampled traces for preprocessed trace from `preprocessed.tar.gz`. Sample sizes are 50-3k -functions with step 50 and 3k-24k with step 1k. +```console +tar -xvzf sampled_150.tar.gz +tar -xvzf preprocessed_150.tar.gz +``` The reference traces were obtained by running the following commands: ```console -python3 -m preprocess -t data/azure/ -o data/reference/preprocessed_150 -s 00:09:00 -dur 150 +python3 -m sampler preprocess -t data/azure/ -o data/traces/reference/preprocessed_150 -s 00:09:00 -dur 150 -python3 -m sample -t data/reference/preprocessed_150 -o data/reference/sampled_150 -min 3000 -st 1000 -max 24000 -tr 16 -python3 -m sample -t data/reference/sampled_150/samples/3000 -o data/reference/sampled_150 -min 50 -st 50 -max 3000 -tr 16 +python3 -m sampler sample -t data/traces/reference/preprocessed_150 -orig data/traces/reference/preprocessed_150 -o data/traces/reference/sampled_150 -min 3000 -st 1000 -max 24000 -tr 16 +python3 -m sampler sample -t data/traces/reference/sampled_150/samples/3000 -orig data/traces/reference/preprocessed_150 -o data/traces/reference/sampled_150 -min 200 -st 50 -max 3000 -tr 16 +python3 -m sampler sample -t data/traces/reference/sampled_150/samples/200 -orig data/traces/reference/preprocessed_150 -o data/traces/reference/sampled_150 -min 10 -st 10 -max 200 -tr 16 ``` ## Tools diff --git a/requirements.txt b/requirements.txt index a22ba4910..4f7553728 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ matplotlib==3.7.2 -numpy==1.26.1 +numpy==1.24.4 pandas==1.3.5 -scipy==1.11.2 +scipy==1.10.1 pytest==7.4.0 cloudpickle==2.2.1 seaborn==0.13.0 diff --git a/sampler/__main__.py b/sampler/__main__.py index afc75afec..59258d2c2 100644 --- a/sampler/__main__.py +++ b/sampler/__main__.py @@ -108,6 +108,7 @@ def main(): sample_parser.add_argument( '-orig', '--original_trace', + required=True, metavar='path', default=None, help='Path to the Azure (or other original) trace files, required to maximize the derived sample\'s representativity (WD from the original trace)'