diff --git a/api/src/main/java/org/apache/iceberg/expressions/Evaluator.java b/api/src/main/java/org/apache/iceberg/expressions/Evaluator.java index e96a5284670c..70dfbd157560 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Evaluator.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Evaluator.java @@ -142,5 +142,10 @@ public Boolean in(BoundReference ref, Literal lit) { public Boolean notIn(BoundReference ref, Literal lit) { return !in(ref, lit); } + + @Override + public Boolean startsWith(BoundReference ref, Literal lit) { + return ((String) ref.get(struct)).startsWith((String) lit.value()); + } } } diff --git a/api/src/main/java/org/apache/iceberg/expressions/Expression.java b/api/src/main/java/org/apache/iceberg/expressions/Expression.java index 124e61225596..20b1abcd34cb 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Expression.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Expression.java @@ -40,7 +40,8 @@ enum Operation { NOT_IN, NOT, AND, - OR; + OR, + STARTS_WITH; /** * @return the operation used when this is negated diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java index cc45531bad33..fe2b742d6137 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java @@ -97,6 +97,10 @@ public R notIn(BoundReference ref, Literal lit) { return null; } + public R startsWith(BoundReference ref, Literal lit) { + throw new UnsupportedOperationException("Unsupported operation."); + } + @Override public R predicate(BoundPredicate pred) { switch (pred.op()) { @@ -120,6 +124,8 @@ public R predicate(BoundPredicate pred) { return in(pred.ref(), pred.literal()); case NOT_IN: return notIn(pred.ref(), pred.literal()); + case STARTS_WITH: + return startsWith(pred.ref(), pred.literal()); default: throw new UnsupportedOperationException( "Unknown operation for predicate: " + pred.op()); diff --git a/api/src/main/java/org/apache/iceberg/expressions/Expressions.java b/api/src/main/java/org/apache/iceberg/expressions/Expressions.java index d280f89cf3cd..b99fffee83aa 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Expressions.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Expressions.java @@ -105,6 +105,10 @@ public static UnboundPredicate notEqual(String name, T value) { return new UnboundPredicate<>(Expression.Operation.NOT_EQ, ref(name), value); } + public static UnboundPredicate startsWith(String name, String value) { + return new UnboundPredicate<>(Expression.Operation.STARTS_WITH, ref(name), value); + } + public static UnboundPredicate predicate(Operation op, String name, T value) { Preconditions.checkArgument(op != Operation.IS_NULL && op != Operation.NOT_NULL, "Cannot create %s predicate inclusive a value", op); diff --git a/api/src/main/java/org/apache/iceberg/expressions/Predicate.java b/api/src/main/java/org/apache/iceberg/expressions/Predicate.java index 12923ef2503f..bc368d16af28 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Predicate.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Predicate.java @@ -62,6 +62,8 @@ public String toString() { return String.valueOf(ref()) + " == " + literal(); case NOT_EQ: return String.valueOf(ref()) + " != " + literal(); + case STARTS_WITH: + return ref() + " startsWith \"" + literal() + "\""; // case IN: // break; // case NOT_IN: diff --git a/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java b/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java index dd3a0b8da8a0..f62c1490e24f 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java @@ -194,6 +194,12 @@ public Expression notEq(BoundReference ref, Literal lit) { return (cmp.compare(ref.get(struct), lit.value()) != 0) ? alwaysTrue() : alwaysFalse(); } + @Override + public Expression startsWith(BoundReference ref, Literal lit) { + Comparator cmp = lit.comparator(); + return ((String) ref.get(struct)).startsWith((String) lit.value()) ? alwaysTrue() : alwaysFalse(); + } + @Override @SuppressWarnings("unchecked") public Expression predicate(BoundPredicate pred) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java index 9448d0233519..d088119b210a 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java @@ -113,6 +113,7 @@ public UnboundPredicate project(String name, BoundPredicate predicat predicate.op(), name, apply(predicate.literal().value())); // case IN: // return Expressions.predicate(); + case STARTS_WITH: default: // comparison predicates can't be projected, notEq can't be projected // TODO: small ranges can be projected. diff --git a/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java b/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java index 84f604c38ec8..d7b4d393e24b 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java +++ b/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java @@ -189,6 +189,8 @@ static UnboundPredicate truncateArray( return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary)); case EQ: return predicate(Expression.Operation.EQ, name, transform.apply(boundary)); + case STARTS_WITH: + return predicate(Expression.Operation.STARTS_WITH, name, transform.apply(boundary)); // case IN: // TODO // return Expressions.predicate(Operation.IN, name, transform.apply(boundary)); default: diff --git a/api/src/main/java/org/apache/iceberg/transforms/Truncate.java b/api/src/main/java/org/apache/iceberg/transforms/Truncate.java index d7d55ddfe1b7..148f205222d5 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Truncate.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Truncate.java @@ -213,20 +213,35 @@ public boolean canTransform(Type type) { @Override public UnboundPredicate project(String name, - BoundPredicate pred) { - if (pred.op() == NOT_NULL || pred.op() == IS_NULL) { - return Expressions.predicate(pred.op(), name); + BoundPredicate predicate) { + switch (predicate.op()) { + case NOT_NULL: + case IS_NULL: + return Expressions.predicate(predicate.op(), name); + case STARTS_WITH: + default: + return ProjectionUtil.truncateArray(name, predicate, this); } - return ProjectionUtil.truncateArray(name, pred, this); } @Override public UnboundPredicate projectStrict(String name, - BoundPredicate pred) { - if (pred.op() == NOT_NULL || pred.op() == IS_NULL) { - return Expressions.predicate(pred.op(), name); + BoundPredicate predicate) { + switch (predicate.op()) { + case IS_NULL: + case NOT_NULL: + return Expressions.predicate(predicate.op(), name); + case STARTS_WITH: + if (predicate.literal().value().length() < width()) { + return Expressions.predicate(predicate.op(), name, predicate.literal().value()); + } else if (predicate.literal().value().length() == width()) { + return Expressions.equal(name, predicate.literal().value()); + } else { + return null; + } + default: + return ProjectionUtil.truncateArrayStrict(name, predicate, this); } - return ProjectionUtil.truncateArrayStrict(name, pred, this); } @Override diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java index 3e846999a567..fc4fc394c3aa 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java @@ -34,6 +34,7 @@ import static org.apache.iceberg.expressions.Expressions.lessThan; import static org.apache.iceberg.expressions.Expressions.not; import static org.apache.iceberg.expressions.Expressions.or; +import static org.apache.iceberg.expressions.Expressions.startsWith; import static org.apache.iceberg.types.Types.NestedField.required; public class TestExpressionBinding { @@ -131,6 +132,18 @@ public void testNot() { Assert.assertEquals("Should bind x correctly", 0, child.ref().fieldId()); } + @Test + public void testStartsWith() { + StructType struct = StructType.of(required(0, "s", Types.StringType.get())); + Expression expr = startsWith("s", "abc"); + Expression boundExpr = Binder.bind(struct, expr); + TestHelpers.assertAllReferencesBound("StartsWith", boundExpr); + // make sure the expression is a StartsWith + BoundPredicate pred = TestHelpers.assertAndUnwrap(boundExpr, BoundPredicate.class); + Assert.assertEquals("Should be right operation", Expression.Operation.STARTS_WITH, pred.op()); + Assert.assertEquals("Should bind s correctly", 0, pred.ref().fieldId()); + } + @Test public void testAlwaysTrue() { Assert.assertEquals("Should not change alwaysTrue", diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestStartsWith.java b/api/src/test/java/org/apache/iceberg/transforms/TestStartsWith.java new file mode 100644 index 000000000000..38aa9914fc92 --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/transforms/TestStartsWith.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.transforms; + +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; +import org.apache.iceberg.TestHelpers; +import org.apache.iceberg.expressions.Binder; +import org.apache.iceberg.expressions.BoundPredicate; +import org.apache.iceberg.expressions.Evaluator; +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.expressions.False; +import org.apache.iceberg.expressions.Literal; +import org.apache.iceberg.expressions.Projections; +import org.apache.iceberg.expressions.UnboundPredicate; +import org.apache.iceberg.types.Types; +import org.junit.Assert; +import org.junit.Test; + +import static org.apache.iceberg.TestHelpers.assertAndUnwrapUnbound; +import static org.apache.iceberg.expressions.Expressions.startsWith; +import static org.apache.iceberg.types.Types.NestedField.optional; + +public class TestStartsWith { + + private static final String COLUMN = "someStringCol"; + private static final Schema SCHEMA = new Schema(optional(1, COLUMN, Types.StringType.get())); + + @Test + public void testTruncateProjections() { + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).truncate(COLUMN, 4).build(); + + assertProjectionInclusive(spec, startsWith(COLUMN, "ab"), "ab", Expression.Operation.STARTS_WITH); + assertProjectionInclusive(spec, startsWith(COLUMN, "abab"), "abab", Expression.Operation.STARTS_WITH); + assertProjectionInclusive(spec, startsWith(COLUMN, "ababab"), "abab", Expression.Operation.STARTS_WITH); + + assertProjectionStrict(spec, startsWith(COLUMN, "ab"), "ab", Expression.Operation.STARTS_WITH); + assertProjectionStrict(spec, startsWith(COLUMN, "abab"), "abab", Expression.Operation.EQ); + + Expression projection = Projections.strict(spec).project(startsWith(COLUMN, "ababab")); + Assert.assertTrue(projection instanceof False); + } + + @Test + public void testTruncateString() { + Truncate trunc = Truncate.get(Types.StringType.get(), 2); + Expression expr = startsWith(COLUMN, "abcde"); + BoundPredicate boundExpr = (BoundPredicate) Binder.bind(SCHEMA.asStruct(), expr, false); + + UnboundPredicate projected = trunc.project(COLUMN, boundExpr); + Evaluator evaluator = new Evaluator(SCHEMA.asStruct(), projected); + + Assert.assertTrue("startsWith(abcde, truncate(abcde,2)) => true", + evaluator.eval(TestHelpers.Row.of("abcde"))); + } + + private void assertProjectionInclusive(PartitionSpec spec, UnboundPredicate filter, + String expectedLiteral, Expression.Operation expectedOp) { + Expression projection = Projections.inclusive(spec).project(filter); + assertProjection(spec, expectedLiteral, projection, expectedOp); + } + + private void assertProjectionStrict(PartitionSpec spec, UnboundPredicate filter, + String expectedLiteral, Expression.Operation expectedOp) { + Expression projection = Projections.strict(spec).project(filter); + assertProjection(spec, expectedLiteral, projection, expectedOp); + } + + private void assertProjection(PartitionSpec spec, String expectedLiteral, Expression projection, + Expression.Operation expectedOp) { + UnboundPredicate predicate = assertAndUnwrapUnbound(projection); + Literal literal = predicate.literal(); + Truncate transform = (Truncate) spec.getFieldsBySourceId(1).get(0).transform(); + String output = transform.toHumanString((String) literal.value()); + + Assert.assertEquals(expectedOp, predicate.op()); + Assert.assertEquals(expectedLiteral, output); + } +} diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesResiduals.java b/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesResiduals.java index 1ed1f4cad9c0..f2160d5a0add 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesResiduals.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesResiduals.java @@ -36,6 +36,7 @@ import static org.apache.iceberg.expressions.Expressions.lessThan; import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual; import static org.apache.iceberg.expressions.Expressions.notEqual; +import static org.apache.iceberg.expressions.Expressions.startsWith; public class TestTruncatesResiduals { @@ -173,5 +174,10 @@ public void testStringTruncateTransformResiduals() { assertResidualValue(spec, notEqual("value", "bcd"), "ab", Expression.Operation.TRUE); assertResidualPredicate(spec, notEqual("value", "bcd"), "bc"); assertResidualValue(spec, notEqual("value", "bcd"), "cd", Expression.Operation.TRUE); + + // starts with + assertResidualValue(spec, startsWith("value", "bcd"), "ab", Expression.Operation.FALSE); + assertResidualPredicate(spec, startsWith("value", "bcd"), "bc"); + assertResidualValue(spec, startsWith("value", "bcd"), "cd", Expression.Operation.FALSE); } }