From 418b6519184f068ee78ba67cff97e2819ba9f478 Mon Sep 17 00:00:00 2001 From: Mark Juggurnauth-Thomas Date: Thu, 28 Nov 2024 04:21:13 -0800 Subject: [PATCH] packer: add --tuning-info-scuba-log-file argument Summary: Same as D66544989, but for the `--tuning-info-*` argument to the packer. We also add an alias so we can standardize on `scuba-dataset` for the dataset argument. Reviewed By: clara-9 Differential Revision: D66546324 fbshipit-source-id: 41cb1cccf97515ecb388da5ab65669edbb5cbfb1 --- eden/mononoke/cmds/packer/main.rs | 27 +++++++++++-------- .../test-packer-tuning-debug-info.t | 2 +- eden/mononoke/tests/integration/test-packer.t | 12 ++++----- .../test-new-walker-pack-info-multiplex.t | 4 +-- .../walker/test-new-walker-pack-info.t | 2 +- 5 files changed, 26 insertions(+), 21 deletions(-) diff --git a/eden/mononoke/cmds/packer/main.rs b/eden/mononoke/cmds/packer/main.rs index 9f36ea4f6f4d1..b435e39985abf 100644 --- a/eden/mononoke/cmds/packer/main.rs +++ b/eden/mononoke/cmds/packer/main.rs @@ -11,6 +11,7 @@ use std::io; use std::io::prelude::*; use std::io::BufReader; use std::path::Path; +use std::path::PathBuf; use std::time::Instant; use anyhow::bail; @@ -64,14 +65,13 @@ struct MononokePackerArgs { #[clap(long, help = "If true, print the progress of the packing")] print_progress: bool, - /// The scuba table that contains the tuning debug information, - /// for example, the time used for finding the best packing strategy - #[clap( - long, - default_value_t = String::from("file:///tmp/packer_tuning_log.json"), - help = "The scuba table that contains the tuning debug information" - )] - tuning_info_scuba_table: String, + /// Scuba table to log tuning information to + #[clap(long, alias = "tuning-info-scuba-dataset")] + tuning_info_scuba_table: Option, + + /// File to log tuning information to + #[clap(long)] + tuning_info_scuba_log_file: Option, } const PACK_PREFIX: &str = "multiblob-"; @@ -145,7 +145,6 @@ fn main(fb: FacebookInit) -> Result<()> { let max_parallelism = args.scheduled_max; let keys_dir = args.keys_dir; let print_progress = args.print_progress; - let tuning_info_scuba_table = args.tuning_info_scuba_table; let env = app.environment(); let logger = app.logger(); @@ -161,8 +160,14 @@ fn main(fb: FacebookInit) -> Result<()> { .collect::, io::Error>>()?; keys_file_entries.shuffle(&mut thread_rng()); - // prepare the tuning info scuba table - let tuning_info_scuba_builder = MononokeScubaSampleBuilder::new(fb, &tuning_info_scuba_table)?; + let mut tuning_info_scuba_builder = match args.tuning_info_scuba_table { + Some(table) => MononokeScubaSampleBuilder::new(fb, &table)?, + None => MononokeScubaSampleBuilder::with_discard(), + }; + + if let Some(file) = args.tuning_info_scuba_log_file { + tuning_info_scuba_builder = tuning_info_scuba_builder.with_log_file(file)?; + } let total_file_count = keys_file_entries.len(); for (cur, entry) in keys_file_entries.iter().enumerate() { diff --git a/eden/mononoke/tests/integration/test-packer-tuning-debug-info.t b/eden/mononoke/tests/integration/test-packer-tuning-debug-info.t index 7437dd9996ebe..1bd06d2cf1453 100644 --- a/eden/mononoke/tests/integration/test-packer-tuning-debug-info.t +++ b/eden/mononoke/tests/integration/test-packer-tuning-debug-info.t @@ -47,7 +47,7 @@ $ echo 'repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a' >> $TESTTMP/pack_key_files4/reporepo.store0.part0.keys.txt # Pack content into a pack - $ packer --zstd-level 19 --scuba-log-file packed.json --keys-dir $TESTTMP/pack_key_files4/ --print-progress --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json" 2>&1 | strip_glog + $ packer --zstd-level 19 --scuba-log-file packed.json --keys-dir $TESTTMP/pack_key_files4/ --print-progress --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json" 2>&1 | strip_glog File *reporepo.store0.part0.keys.txt, which has 3 lines (glob) Progress: 100.000% processing took * (glob) diff --git a/eden/mononoke/tests/integration/test-packer.t b/eden/mononoke/tests/integration/test-packer.t index a7a8dc1c65ac3..cc961c80ca9ad 100644 --- a/eden/mononoke/tests/integration/test-packer.t +++ b/eden/mononoke/tests/integration/test-packer.t @@ -49,9 +49,9 @@ $ echo 'repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a' >> $TESTTMP/pack_key_files3/reporepo.store0.part0.keys.txt # Pack content individually, to show recompression effect - $ packer --zstd-level 10 --scuba-log-file pack-individually.json --keys-dir $TESTTMP/pack_key_files1/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json" - $ packer --zstd-level 10 --scuba-log-file pack-individually.json --keys-dir $TESTTMP/pack_key_files2/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json" - $ packer --zstd-level 10 --scuba-log-file pack-individually.json --keys-dir $TESTTMP/pack_key_files3/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json" + $ packer --zstd-level 10 --scuba-log-file pack-individually.json --keys-dir $TESTTMP/pack_key_files1/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json" + $ packer --zstd-level 10 --scuba-log-file pack-individually.json --keys-dir $TESTTMP/pack_key_files2/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json" + $ packer --zstd-level 10 --scuba-log-file pack-individually.json --keys-dir $TESTTMP/pack_key_files3/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json" # Check logging for individually packed keys (last 3 digits of the compressed size are matched by glob because they can change on zstd crate updates) $ jq -r '.int * .normal | [ .blobstore_id, .blobstore_key, .uncompressed_size, .compressed_size ] | @csv' < pack-individually.json | sort | uniq @@ -78,7 +78,7 @@ $ echo 'repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a' >> $TESTTMP/pack_key_files4/reporepo.store0.part0.keys.txt # Pack content into a pack - $ packer --zstd-level 19 --scuba-log-file packed.json --keys-dir $TESTTMP/pack_key_files4/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json" + $ packer --zstd-level 19 --scuba-log-file packed.json --keys-dir $TESTTMP/pack_key_files4/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json" # Check logging for packed keys (last 3 digits of the compressed size are matched by glob because they can change on zstd crate updates) $ jq -r '.int * .normal | [ .blobstore_id, .blobstore_key, .pack_key, .uncompressed_size, .compressed_size ] | @csv' < packed.json | sort | uniq @@ -125,7 +125,7 @@ $ echo 'repo0000.alias.sha256.85b856bc2313fcddec8464984ab2d384f61625890ee19e4f909dd80ac36e8fd7' >> $TESTTMP/pack_key_files_aliases/reporepo.store0.part0.keys.txt $ echo 'repo0000.alias.sha256.9b798d4eb3901972c1311a3c6a21480e3f29c8c64cd6bbb81a977ecab56452e3' >> $TESTTMP/pack_key_files_aliases/reporepo.store0.part0.keys.txt - $ packer --zstd-level 19 --keys-dir $TESTTMP/pack_key_files_aliases/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json" + $ packer --zstd-level 19 --keys-dir $TESTTMP/pack_key_files_aliases/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json" # Show that they're not packed (hardlink count of 1) $ stat -c '%s %h %N' $TESTTMP/blobstore/0/blobs/blob-repo0000.alias.* | sort -n @@ -152,7 +152,7 @@ $ echo '' >> $TESTTMP/pack_key_files5/reporepo.store0.part0.keys.txt $ echo 'repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a' >> $TESTTMP/pack_key_files5/reporepo.store0.part0.keys.txt - $ packer --zstd-level 19 --keys-dir $TESTTMP/pack_key_files5/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json" + $ packer --zstd-level 19 --keys-dir $TESTTMP/pack_key_files5/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json" $ stat -c '%s %h %N' $TESTTMP/blobstore/0/blobs/blob-repo0000.content.blake2.* | sort -n * 1 '$TESTTMP/blobstore/0/blobs/blob-repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a.pack' (glob) * 2 '$TESTTMP/blobstore/0/blobs/blob-repo0000.content.blake2.4caa3d2f7430890df6f5deb3b652fcc88769e3323c0b7676e9771d172a521bbd.pack' (glob) diff --git a/eden/mononoke/tests/integration/walker/test-new-walker-pack-info-multiplex.t b/eden/mononoke/tests/integration/walker/test-new-walker-pack-info-multiplex.t index 13116cb056db6..baee9b4376251 100644 --- a/eden/mononoke/tests/integration/walker/test-new-walker-pack-info-multiplex.t +++ b/eden/mononoke/tests/integration/walker/test-new-walker-pack-info-multiplex.t @@ -30,8 +30,8 @@ Set up the key file for packing $ (cd blobstore/0/blobs; ls) | sed -e 's/^blob-//' -e 's/.pack$//' >> $TESTTMP/pack_key_files_1/reporepo.store1.part1.keys.txt Pack the blobs in the two packed stores differently - $ packer --zstd-level=3 --keys-dir $TESTTMP/pack_key_files_0/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json" - $ packer --zstd-level=19 --keys-dir $TESTTMP/pack_key_files_1/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json" + $ packer --zstd-level=3 --keys-dir $TESTTMP/pack_key_files_0/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json" + $ packer --zstd-level=19 --keys-dir $TESTTMP/pack_key_files_1/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json" Run a scrub, need a scrub action to put ScrubBlobstore in the stack, which is necessary to make sure all the inner stores of the multiplex are read $ mononoke_walker -l loaded --blobstore-scrub-action=ReportOnly scrub -q -I deep -i bonsai -i FileContent -b master_bookmark -a all --pack-log-scuba-file pack-info-packed.json 2>&1 | strip_glog diff --git a/eden/mononoke/tests/integration/walker/test-new-walker-pack-info.t b/eden/mononoke/tests/integration/walker/test-new-walker-pack-info.t index ecedfecdcf864..32aec9b0d290a 100644 --- a/eden/mononoke/tests/integration/walker/test-new-walker-pack-info.t +++ b/eden/mononoke/tests/integration/walker/test-new-walker-pack-info.t @@ -41,7 +41,7 @@ Check logged pack info. Commit time is forced to zero in tests, hence mtime is 0 $ (cd blobstore/blobs; ls) | sed -e 's/^blob-//' -e 's/.pack$//' >> $TESTTMP/pack_key_files/reporepo.store.part0.keys.txt Now pack the blobs - $ packer --zstd-level=3 --keys-dir $TESTTMP/pack_key_files/ --tuning-info-scuba-table "file://${TESTTMP}/tuning_scuba.json" + $ packer --zstd-level=3 --keys-dir $TESTTMP/pack_key_files/ --tuning-info-scuba-log-file "${TESTTMP}/tuning_scuba.json" Run a scrub again now the storage is packed $ mononoke_walker -l loaded scrub -q -I deep -i bonsai -i FileContent -p Changeset --checkpoint-name=bonsai_packed --checkpoint-path=test_sqlite -a all --pack-log-scuba-file pack-info-packed.json 2>&1 | strip_glog