Skip to content

Commit

Permalink
datashed: improve hashmap performance (#60)
Browse files Browse the repository at this point in the history
Signed-off-by: Nico Wagner <[email protected]>
  • Loading branch information
nwagner84 authored Jul 26, 2024
1 parent 72c22b9 commit 367cafb
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 4 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ rust-version = "1.80.0"
[workspace.dependencies]
bstr = { version = "1.9.1", features = ["unicode"] }
glob = { version = "0.3.1" }
hashbrown = { version = "0.14.5" }
indicatif = { version = "0.17.8", features = ["rayon"] }
ndarray = { version = "0.15.6" }
ndarray-stats = { version = "0.5.1" }
Expand Down
1 change: 1 addition & 0 deletions crates/datashed/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ dataset = { workspace = true }
dialoguer = { version = "0.11.0" }
flate2 = { version = "1.0.30" }
glob = { workspace = true }
hashbrown = { workspace = true }
humansize = { version = "2.1.3" }
indicatif = { workspace = true }
lingua = { version = "1.6.2" }
Expand Down
8 changes: 4 additions & 4 deletions crates/datashed/src/commands/vocab.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use std::collections::BTreeMap;
use std::fs::File;
use std::io::stdout;
use std::path::PathBuf;

use bstr::ByteSlice;
use clap::Parser;
use hashbrown::HashMap;
use indicatif::ParallelProgressIterator;
use polars::prelude::*;
use polars::sql::SQLContext;
Expand Down Expand Up @@ -44,7 +44,7 @@ pub(crate) struct Vocab {
predicate: Option<String>,
}

type VocabMap = BTreeMap<String, u64>;
type VocabMap = HashMap<String, u64>;

impl Vocab {
pub(crate) fn execute(self) -> DatashedResult<()> {
Expand Down Expand Up @@ -96,8 +96,8 @@ impl Vocab {
acc
});

let mut tokens = vec![];
let mut counts = vec![];
let mut tokens = Vec::with_capacity(vocab.len());
let mut counts = Vec::with_capacity(vocab.len());

for (token, count) in vocab.into_iter() {
tokens.push(token);
Expand Down

0 comments on commit 367cafb

Please sign in to comment.