Skip to content

Commit

Permalink
Specialize single column primitive group values (#7043)
Browse files Browse the repository at this point in the history
* Specialize primitive group values

* Split module

* RawTable

* Support all primitives

* Add docs

* Update datafusion-cli cargo lock

* Make Cargo.toml order 'just so'

* Review feedback

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
tustvold and alamb authored Jul 21, 2023
1 parent 368f6e6 commit 77fafb9
Show file tree
Hide file tree
Showing 7 changed files with 487 additions and 232 deletions.
73 changes: 24 additions & 49 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ datafusion-sql = { path = "../sql", version = "27.0.0" }
flate2 = { version = "1.0.24", optional = true }
futures = "0.3"
glob = "0.3.0"
half = { version = "2.1", default-features = false }
hashbrown = { version = "0.14", features = ["raw"] }
indexmap = "2.0.0"
itertools = "0.11"
Expand Down
64 changes: 64 additions & 0 deletions datafusion/core/src/physical_plan/aggregates/group_values/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use arrow_array::{downcast_primitive, ArrayRef};
use arrow_schema::SchemaRef;
use datafusion_common::Result;
use datafusion_physical_expr::EmitTo;

mod primitive;
use primitive::GroupValuesPrimitive;

mod row;
use row::GroupValuesRows;

/// An interning store for group keys
pub trait GroupValues: Send {
/// Calculates the `groups` for each input row of `cols`
fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> Result<()>;

/// Returns the number of bytes used by this [`GroupValues`]
fn size(&self) -> usize;

/// Returns true if this [`GroupValues`] is empty
fn is_empty(&self) -> bool;

/// The number of values stored in this [`GroupValues`]
fn len(&self) -> usize;

/// Emits the group values
fn emit(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>>;
}

pub fn new_group_values(schema: SchemaRef) -> Result<Box<dyn GroupValues>> {
if schema.fields.len() == 1 {
let d = schema.fields[0].data_type();

macro_rules! downcast_helper {
($t:ty, $d:ident) => {
return Ok(Box::new(GroupValuesPrimitive::<$t>::new($d.clone())))
};
}

downcast_primitive! {
d => (downcast_helper, d),
_ => {}
}
}

Ok(Box::new(GroupValuesRows::try_new(schema)?))
}
Loading

0 comments on commit 77fafb9

Please sign in to comment.