Skip to content

Commit

Permalink
Optimize iszero function (3-5x faster) (#12881)
Browse files Browse the repository at this point in the history
* add bench

* Optimize iszero function (3-5x) faster
  • Loading branch information
simonvandel authored Oct 13, 2024
1 parent e7ac843 commit 1582e8d
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 15 deletions.
5 changes: 5 additions & 0 deletions datafusion/functions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,11 @@ harness = false
name = "make_date"
required-features = ["datetime_expressions"]

[[bench]]
harness = false
name = "iszero"
required-features = ["math_expressions"]

[[bench]]
harness = false
name = "nullif"
Expand Down
46 changes: 46 additions & 0 deletions datafusion/functions/benches/iszero.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

extern crate criterion;

use arrow::{
datatypes::{Float32Type, Float64Type},
util::bench_util::create_primitive_array,
};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_expr::ColumnarValue;
use datafusion_functions::math::iszero;
use std::sync::Arc;

fn criterion_benchmark(c: &mut Criterion) {
let iszero = iszero();
for size in [1024, 4096, 8192] {
let f32_array = Arc::new(create_primitive_array::<Float32Type>(size, 0.2));
let f32_args = vec![ColumnarValue::Array(f32_array)];
c.bench_function(&format!("iszero f32 array: {}", size), |b| {
b.iter(|| black_box(iszero.invoke(&f32_args).unwrap()))
});
let f64_array = Arc::new(create_primitive_array::<Float64Type>(size, 0.2));
let f64_args = vec![ColumnarValue::Array(f64_array)];
c.bench_function(&format!("iszero f64 array: {}", size), |b| {
b.iter(|| black_box(iszero.invoke(&f64_args).unwrap()))
});
}
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
24 changes: 9 additions & 15 deletions datafusion/functions/src/math/iszero.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
use std::any::Any;
use std::sync::Arc;

use arrow::array::{ArrayRef, BooleanArray, Float32Array, Float64Array};
use arrow::datatypes::DataType;
use arrow::array::{ArrayRef, AsArray, BooleanArray};
use arrow::datatypes::DataType::{Boolean, Float32, Float64};
use arrow::datatypes::{DataType, Float32Type, Float64Type};

use datafusion_common::{exec_err, DataFusionError, Result};
use datafusion_common::{exec_err, Result};
use datafusion_expr::ColumnarValue;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
Expand Down Expand Up @@ -77,20 +77,14 @@ impl ScalarUDFImpl for IsZeroFunc {
/// Iszero SQL function
pub fn iszero(args: &[ArrayRef]) -> Result<ArrayRef> {
match args[0].data_type() {
Float64 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
"x",
Float64Array,
BooleanArray,
{ |x: f64| { x == 0_f64 } }
Float64 => Ok(Arc::new(BooleanArray::from_unary(
args[0].as_primitive::<Float64Type>(),
|x| x == 0.0,
)) as ArrayRef),

Float32 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
"x",
Float32Array,
BooleanArray,
{ |x: f32| { x == 0_f32 } }
Float32 => Ok(Arc::new(BooleanArray::from_unary(
args[0].as_primitive::<Float32Type>(),
|x| x == 0.0,
)) as ArrayRef),

other => exec_err!("Unsupported data type {other:?} for function iszero"),
Expand Down

0 comments on commit 1582e8d

Please sign in to comment.