Skip to content

Commit

Permalink
Merge pull request #1423 from jqnatividad/atoi_simd
Browse files Browse the repository at this point in the history
use `ato_simd` for fast &[u8] to int conversion
  • Loading branch information
jqnatividad authored Nov 18, 2023
2 parents d252fc4 + d3b5747 commit b5fae59
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 23 deletions.
10 changes: 5 additions & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ panic = "abort"
[dependencies]
ahash = "0.8"
anyhow = { version = "1.0", optional = true }
atoi = "2"
atoi_simd = "0.15"
bincode = "1.3"
byteorder = "1.5"
bytes = "1"
Expand Down Expand Up @@ -150,7 +150,6 @@ polars = { version = "0.35", features = [
"ipc",
"performant",
"cse",
"dtype-categorical"
], optional = true }
pyo3 = { version = "0.20", features = ["auto-initialize"], optional = true }
qsv-dateparser = "0.10"
Expand Down
6 changes: 3 additions & 3 deletions src/cmd/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,19 +332,19 @@ pub fn infer_schema_from_stats(args: &Args, input_filename: &str) -> CliResult<M
type_list.push(Value::String("integer".to_string()));

if let Some(min_str) = stats_record.get(stats_col_index_map["min"]) {
let min = min_str.parse::<i64>().unwrap();
let min = atoi_simd::parse::<i64>(min_str.as_bytes()).unwrap();
field_map.insert("minimum".to_string(), Value::Number(Number::from(min)));
};

if let Some(max_str) = stats_record.get(stats_col_index_map["max"]) {
let max = max_str.parse::<i64>().unwrap();
let max = atoi_simd::parse::<i64>(max_str.as_bytes()).unwrap();
field_map.insert("maximum".to_string(), Value::Number(Number::from(max)));
};

// enum constraint
if let Some(values) = unique_values_map.get(&header_string) {
for value in values {
let int_value = value.parse::<i64>().unwrap();
let int_value = atoi_simd::parse::<i64>(value.as_bytes()).unwrap();
enum_list.push(Value::Number(Number::from(int_value)));
}
}
Expand Down
20 changes: 12 additions & 8 deletions src/cmd/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,15 +308,19 @@ fn next_num<'a, X>(xs: &mut X) -> Option<Number>
where
X: Iterator<Item = &'a [u8]>,
{
xs.next()
.map(|bytes| from_utf8(bytes).unwrap())
.and_then(|s| {
if let Ok(i) = s.parse::<i64>() {
match xs.next() {
Some(bytes) => {
if let Ok(i) = atoi_simd::parse::<i64>(bytes) {
Some(Number::Int(i))
} else if let Ok(f) = s.parse::<f64>() {
Some(Number::Float(f))
} else {
None
// If parsing as i64 failed, try parsing as f64
if let Ok(f) = from_utf8(bytes).unwrap().parse::<f64>() {
Some(Number::Float(f))
} else {
None
}
}
})
},
None => None,
}
}
8 changes: 4 additions & 4 deletions src/cmd/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1494,7 +1494,7 @@ impl FieldType {
|| current_type == FieldType::TInteger
|| current_type == FieldType::TNull
{
if let Ok(int_val) = string.parse::<i64>() {
if let Ok(int_val) = atoi_simd::parse::<i64>(sample) {
// leading zero, its a string (e.g. zip codes)
if string.as_bytes()[0] == b'0' && int_val != 0 {
return (TString, None);
Expand Down Expand Up @@ -1612,7 +1612,7 @@ impl TypedSum {
// so we don't panic on overflow/underflow, use saturating_add
self.integer = self
.integer
.saturating_add(atoi::atoi::<i64>(sample).unwrap());
.saturating_add(atoi_simd::parse::<i64>(sample).unwrap());
}
},
_ => {},
Expand Down Expand Up @@ -1685,13 +1685,13 @@ impl TypedMinMax {
self.integers.add(n as i64);
},
TInteger => {
let n = atoi::atoi::<i64>(sample).unwrap();
let n = atoi_simd::parse::<i64>(sample).unwrap();
self.integers.add(n);
#[allow(clippy::cast_precision_loss)]
self.floats.add(n as f64);
},
TDate | TDateTime => {
let n = atoi::atoi::<i64>(sample).unwrap();
let n = atoi_simd::parse::<i64>(sample).unwrap();
self.dates.add(n);
},
}
Expand Down
2 changes: 1 addition & 1 deletion src/cmd/validate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -794,7 +794,7 @@ fn to_json_instance(
},
b'i' => {
// integer
if let Ok(int) = value_string.parse::<i64>() {
if let Ok(int) = atoi_simd::parse::<i64>(value_string.as_bytes()) {
json_object_map.insert(key_string, Value::Number(Number::from(int)));
} else {
return fail_clierror!(
Expand Down

0 comments on commit b5fae59

Please sign in to comment.