From cfdba897c989d2b8fec6c99be33e16c43ff7109b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jalil=20David=20Salam=C3=A9=20Messina?= Date: Sat, 19 Oct 2024 18:27:49 +0200 Subject: [PATCH 1/4] feat: add rustc-hash feature Changes in preparation of [rust-lang/rust#131936][1]: - Introduce `rustc-hash` dependency and feature. - Modify the `update.sh` script accordingly. [1]: https://github.com/rust-lang/rust/pull/131936 --- Cargo.toml | 10 ++++++++++ README.md | 12 ++++++++++++ update.sh | 4 +--- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bfbee52..54a03ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,16 @@ repository = "https://github.com/rust-lang/rustdoc-types" [dependencies] serde = {version="1", features=["derive"]} +rustc-hash = {version="2", optional=true} + +[features] +default = [] + +# Switch the hashmaps used in rustdoc-types to the FxHashMap from rustc-hash. +# +# This might improve performace if your are reading the rustdoc JSON from large +# crates like aws_sdk_ec2 +rustc-hash = ["dep:rustc-hash"] [dev-dependencies] bincode = "1.3.3" diff --git a/README.md b/README.md index 3578e35..ba6a7fc 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,18 @@ let krate: rustdoc_types::Crate = serde_json::from_str(&json_string)?; println!("the index has {} items", krate.index.len()); ``` +For performance sensitive crates we suggest turning on the `rustc-hash` +feature. This switches all data structures from `std::collections::HashMap` to +`rustc-hash::FxHashMap` which improves performance when reading big JSON files +(like `aws_sdk_rs`'s). + +`cargo-semver-checks` benchmarked this change with `aws_sdk_ec2`'s JSON and +[observed a -3% improvement to the runtime][csc benchmarks]. The performance +here depends on how much time you spend querying the `HashMap`s, so as always, +measure first c: + +[csc benchmarks]: https://rust-lang.zulipchat.com/#narrow/channel/266220-t-rustdoc/topic/rustc-hash.20and.20performance.20of.20rustdoc-types/near/474855731 + ## Contributing This repo is a reexport of diff --git a/update.sh b/update.sh index 6d10f2c..65612f4 100755 --- a/update.sh +++ b/update.sh @@ -9,9 +9,7 @@ repo="rust" branch="master" curl -# https://raw.githubusercontent.com/${user}/${repo}/${branch}/src/rustdoc-json-types/lib.rs \ - | sed 's/rustc_hash::/std::collections::/g' \ - | sed 's/FxHashMap/HashMap/g' \ - | sed 's/^pub use /use /' \ + | sed '/^pub type FxHashMap.*$/d' \ > src/lib.rs curl -# https://raw.githubusercontent.com/${user}/${repo}/${branch}/src/rustdoc-json-types/tests.rs > src/tests.rs From e0dd9fd7b77126274efbe4b9af6867cfae318945 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jalil=20David=20Salam=C3=A9=20Messina?= Date: Sun, 20 Oct 2024 12:11:38 +0200 Subject: [PATCH 2/4] chore: run ./update.sh --- COMMIT.txt | 2 +- src/lib.rs | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/COMMIT.txt b/COMMIT.txt index 2c2f018..94e66a0 100644 --- a/COMMIT.txt +++ b/COMMIT.txt @@ -1 +1 @@ -2e6f3bd1d32455e535de1d9ee154253c333aec73 +d1fa49b2e66c343210c413b68ed57f150b7b89d8 diff --git a/src/lib.rs b/src/lib.rs index aa22b82..231625b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,11 +3,15 @@ //! These types are the public API exposed through the `--output-format json` flag. The [`Crate`] //! struct is the root of the JSON blob and all other items are contained within. +#[cfg(not(feature = "rustc-hash"))] +use std::collections::HashMap; use std::path::PathBuf; -use std::collections::HashMap; +#[cfg(feature = "rustc-hash")] +use rustc_hash::FxHashMap as HashMap; use serde::{Deserialize, Serialize}; + /// The version of JSON output that this crate represents. /// /// This integer is incremented with every breaking change to the API, From 28b4f6ef57b5410c1e9c5126491d83446d3297dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jalil=20David=20Salam=C3=A9=20Messina?= Date: Sun, 20 Oct 2024 13:27:24 +0200 Subject: [PATCH 3/4] feat(ci): also test with the `rustc-hash` feature --- .github/workflows/CI.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 0c0dc90..df2f67b 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -12,8 +12,13 @@ env: jobs: cargo-test: runs-on: ubuntu-latest + strategy: + matrix: + features: + - default + - rustc-hash steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 - - run: cargo test + - run: cargo test --no-default-features --features '${{ matrix.features }}' From 3cbf24197495c07dd2b248406ceeb6a99ae82260 Mon Sep 17 00:00:00 2001 From: Alona Enraght-Moony Date: Sun, 20 Oct 2024 16:43:38 +0100 Subject: [PATCH 4/4] README reword --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ba6a7fc..ac2ba89 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ let krate: rustdoc_types::Crate = serde_json::from_str(&json_string)?; println!("the index has {} items", krate.index.len()); ``` -For performance sensitive crates we suggest turning on the `rustc-hash` +For performance sensitive crates, consider turning on the `rustc-hash` feature. This switches all data structures from `std::collections::HashMap` to `rustc-hash::FxHashMap` which improves performance when reading big JSON files (like `aws_sdk_rs`'s). @@ -21,7 +21,7 @@ feature. This switches all data structures from `std::collections::HashMap` to `cargo-semver-checks` benchmarked this change with `aws_sdk_ec2`'s JSON and [observed a -3% improvement to the runtime][csc benchmarks]. The performance here depends on how much time you spend querying the `HashMap`s, so as always, -measure first c: +measure first. [csc benchmarks]: https://rust-lang.zulipchat.com/#narrow/channel/266220-t-rustdoc/topic/rustc-hash.20and.20performance.20of.20rustdoc-types/near/474855731