Skip to content

Commit

Permalink
feat:Support applying parquet bloom filters to StringView columns
Browse files Browse the repository at this point in the history
  • Loading branch information
my-vegetable-has-exploded committed Sep 17, 2024
1 parent a08f923 commit 4ed6b17
Showing 1 changed file with 45 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -264,8 +264,12 @@ impl PruningStatistics for BloomFilterStatistics {
.iter()
.map(|value| {
match value {
ScalarValue::Utf8(Some(v)) => sbbf.check(&v.as_str()),
ScalarValue::Binary(Some(v)) => sbbf.check(v),
ScalarValue::Utf8(Some(v)) | ScalarValue::Utf8View(Some(v)) => {
sbbf.check(&v.as_str())
}
ScalarValue::Binary(Some(v)) | ScalarValue::BinaryView(Some(v)) => {
sbbf.check(v)
}
ScalarValue::FixedSizeBinary(_size, Some(v)) => sbbf.check(v),
ScalarValue::Boolean(Some(v)) => sbbf.check(v),
ScalarValue::Float64(Some(v)) => sbbf.check(v),
Expand Down Expand Up @@ -1219,6 +1223,25 @@ mod tests {
.await
}

#[tokio::test]
async fn test_row_group_bloom_filter_pruning_predicate_multiple_expr_view() {
BloomFilterTest::new_data_index_bloom_encoding_stats()
.with_expect_all_pruned()
// generate pruning predicate `(String = "Hello_Not_exists" OR String = "Hello_Not_exists2")`
.run(
lit("1").eq(lit("1")).and(
col(r#""String""#)
.eq(Expr::Literal(ScalarValue::Utf8View(Some(String::from(
"Hello_Not_Exists",
)))))
.or(col(r#""String""#).eq(Expr::Literal(ScalarValue::Utf8View(
Some(String::from("Hello_Not_Exists2")),
)))),
),
)
.await
}

#[tokio::test]
async fn test_row_group_bloom_filter_pruning_predicate_sql_in() {
// load parquet file
Expand Down Expand Up @@ -1286,6 +1309,26 @@ mod tests {
.await
}

#[tokio::test]
async fn test_row_group_bloom_filter_pruning_predicate_with_exists_3_values_view() {
BloomFilterTest::new_data_index_bloom_encoding_stats()
.with_expect_none_pruned()
// generate pruning predicate `(String = "Hello") OR (String = "the quick") OR (String = "are you")`
.run(
col(r#""String""#)
.eq(Expr::Literal(ScalarValue::Utf8View(Some(String::from(
"Hello",
)))))
.or(col(r#""String""#).eq(Expr::Literal(ScalarValue::Utf8View(
Some(String::from("the quick")),
))))
.or(col(r#""String""#).eq(Expr::Literal(ScalarValue::Utf8View(
Some(String::from("are you")),
)))),
)
.await
}

#[tokio::test]
async fn test_row_group_bloom_filter_pruning_predicate_with_or_not_eq() {
BloomFilterTest::new_data_index_bloom_encoding_stats()
Expand Down

0 comments on commit 4ed6b17

Please sign in to comment.