diff --git a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java index 9448d0233519..d088119b210a 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java @@ -113,6 +113,7 @@ public UnboundPredicate project(String name, BoundPredicate predicat predicate.op(), name, apply(predicate.literal().value())); // case IN: // return Expressions.predicate(); + case STARTS_WITH: default: // comparison predicates can't be projected, notEq can't be projected // TODO: small ranges can be projected. diff --git a/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java b/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java index ef1e0c3591ed..db0a103f5765 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java +++ b/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java @@ -197,6 +197,8 @@ static UnboundPredicate truncateArray( return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary)); case EQ: return predicate(Expression.Operation.EQ, name, transform.apply(boundary)); + case STARTS_WITH: + return predicate(Expression.Operation.STARTS_WITH, name, transform.apply(boundary)); // case IN: // TODO // return Expressions.predicate(Operation.IN, name, transform.apply(boundary)); default: diff --git a/api/src/main/java/org/apache/iceberg/transforms/Truncate.java b/api/src/main/java/org/apache/iceberg/transforms/Truncate.java index d65d6462c918..9947ca92e4bd 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Truncate.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Truncate.java @@ -243,17 +243,35 @@ public boolean canTransform(Type type) { @Override public UnboundPredicate project(String name, - BoundPredicate pred) { - if (pred.op() == NOT_NULL || pred.op() == IS_NULL) { - return Expressions.predicate(pred.op(), name); + BoundPredicate predicate) { + switch (predicate.op()) { + case NOT_NULL: + case IS_NULL: + return Expressions.predicate(predicate.op(), name); + case STARTS_WITH: + if (predicate.literal().value().length() <= width()) { + return Expressions.predicate(predicate.op(), name, predicate.literal().value()); + } else { + return ProjectionUtil.truncateArray(name, predicate, this); + } + default: + return ProjectionUtil.truncateArray(name, predicate, this); } - return ProjectionUtil.truncateArray(name, pred, this); } @Override public UnboundPredicate projectStrict(String name, BoundPredicate predicate) { - return null; + switch (predicate.op()) { + case STARTS_WITH: + if (predicate.literal().value().length() <= width()) { + return Expressions.predicate(predicate.op(), name, predicate.literal().value()); + } else { + return null; + } + default: + return null; + } } @Override diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestStartsWith.java b/api/src/test/java/org/apache/iceberg/transforms/TestStartsWith.java new file mode 100644 index 000000000000..1ab4fd932adb --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/transforms/TestStartsWith.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.transforms; + +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; +import org.apache.iceberg.TestHelpers; +import org.apache.iceberg.expressions.Binder; +import org.apache.iceberg.expressions.BoundPredicate; +import org.apache.iceberg.expressions.Evaluator; +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.expressions.Literal; +import org.apache.iceberg.expressions.Projections; +import org.apache.iceberg.expressions.UnboundPredicate; +import org.apache.iceberg.types.Types; +import org.junit.Assert; +import org.junit.Test; + +import static org.apache.iceberg.TestHelpers.assertAndUnwrapUnbound; +import static org.apache.iceberg.expressions.Expressions.startsWith; +import static org.apache.iceberg.types.Types.NestedField.optional; +import static org.apache.iceberg.types.Types.NestedField.required; + +public class TestStartsWith { + + private static final Schema SCHEMA = new Schema(optional(1, "someStringCol", Types.StringType.get())); + + @Test + public void assertTruncateProjections() { + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).truncate("someStringCol", 4).build(); + + assertProjectionInclusive(spec, startsWith("someStringCol", "ab"), "ab"); + assertProjectionInclusive(spec, startsWith("someStringCol", "abab"), "abab"); + assertProjectionInclusive(spec, startsWith("someStringCol", "ababab"), "abab"); + + assertProjectionStrict(spec, startsWith("someStringCol", "ab"), "ab"); + assertProjectionStrict(spec, startsWith("someStringCol", "abab"), "abab"); + } + + @Test + public void assertTruncateString() { + Types.StructType struct = Types.StructType.of(required(0, "s", Types.StringType.get())); + Truncate trunc = Truncate.get(Types.StringType.get(), 2); + Expression expr = startsWith("s", "abcde"); + BoundPredicate boundExpr = (BoundPredicate) Binder.bind(struct, expr, false); + + UnboundPredicate projected = trunc.project("s", boundExpr); + Evaluator evaluator = new Evaluator(struct, projected); + + Assert.assertTrue("startsWith(abcde, truncate(abcde,2)) => true", + evaluator.eval(TestHelpers.Row.of("abcde"))); + } + + private void assertProjectionInclusive(PartitionSpec spec, UnboundPredicate filter, + String expectedLiteral) { + Expression projection = Projections.inclusive(spec).project(filter); + assertProjection(spec, expectedLiteral, projection); + } + + private void assertProjectionStrict(PartitionSpec spec, UnboundPredicate filter, + String expectedLiteral) { + Expression projection = Projections.strict(spec).project(filter); + assertProjection(spec, expectedLiteral, projection); + } + + private void assertProjection(PartitionSpec spec, String expectedLiteral, Expression projection) { + UnboundPredicate predicate = assertAndUnwrapUnbound(projection); + + Assert.assertEquals(predicate.op(), Expression.Operation.STARTS_WITH); + + Literal literal = predicate.literal(); + Truncate transform = (Truncate) spec.getFieldsBySourceId(1).get(0).transform(); + String output = transform.toHumanString((String) literal.value()); + Assert.assertEquals(expectedLiteral, output); + } +} diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTruncate.java b/api/src/test/java/org/apache/iceberg/transforms/TestTruncate.java index bee7d782fc28..3d0c8e3f4a0e 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTruncate.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTruncate.java @@ -21,14 +21,10 @@ import java.math.BigDecimal; import java.nio.ByteBuffer; -import org.apache.iceberg.TestHelpers; -import org.apache.iceberg.expressions.*; import org.apache.iceberg.types.Types; import org.junit.Assert; import org.junit.Test; -import static org.apache.iceberg.expressions.Expressions.startsWith; -import static org.apache.iceberg.types.Types.NestedField.required; public class TestTruncate { @Test public void testTruncateInteger() { @@ -80,20 +76,6 @@ public void testTruncateString() { "abc", trunc.apply("abc")); } - @Test - public void testTruncateStringStartsWith() { - Types.StructType struct = Types.StructType.of(required(0, "s", Types.StringType.get())); - Truncate trunc = Truncate.get(Types.StringType.get(), 2); - Expression expr = startsWith("s", "abcde"); - BoundPredicate boundExpr = (BoundPredicate) Binder.bind(struct, expr, false); - - UnboundPredicate projected = trunc.project("s", boundExpr); - Evaluator evaluator = new Evaluator(struct, projected); - - Assert.assertTrue("startsWith(abcde, truncate(abcde,2)) => true", - evaluator.eval(TestHelpers.Row.of(("abcde")))); - } - @Test public void testTruncateByteBuffer() throws Exception { Truncate trunc = Truncate.get(Types.BinaryType.get(), 4);