From 863c8e972a394471cdcf44c362f080b47b7bf896 Mon Sep 17 00:00:00 2001 From: Sujith Jay Nair Date: Sun, 28 Jul 2019 20:30:52 +0200 Subject: [PATCH] Add Projections --- .../org/apache/iceberg/transforms/Bucket.java | 4 ++ .../iceberg/transforms/ProjectionUtil.java | 2 + .../apache/iceberg/transforms/Truncate.java | 17 ++++++-- .../transforms/TestStartsWithProjection.java | 41 +++++++++++++++++++ 4 files changed, 60 insertions(+), 4 deletions(-) create mode 100644 api/src/test/java/org/apache/iceberg/transforms/TestStartsWithProjection.java diff --git a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java index 9448d0233519..9f951e264e36 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java @@ -31,10 +31,13 @@ import java.util.UUID; import org.apache.iceberg.expressions.BoundPredicate; import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.expressions.Literal; import org.apache.iceberg.expressions.UnboundPredicate; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; +import static org.apache.iceberg.expressions.Expression.Operation.STARTS_WITH; +import static org.apache.iceberg.expressions.Expressions.startsWith; import static org.apache.iceberg.types.Type.TypeID; abstract class Bucket implements Transform { @@ -113,6 +116,7 @@ public UnboundPredicate project(String name, BoundPredicate predicat predicate.op(), name, apply(predicate.literal().value())); // case IN: // return Expressions.predicate(); + case STARTS_WITH: default: // comparison predicates can't be projected, notEq can't be projected // TODO: small ranges can be projected. diff --git a/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java b/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java index ef1e0c3591ed..db0a103f5765 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java +++ b/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java @@ -197,6 +197,8 @@ static UnboundPredicate truncateArray( return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary)); case EQ: return predicate(Expression.Operation.EQ, name, transform.apply(boundary)); + case STARTS_WITH: + return predicate(Expression.Operation.STARTS_WITH, name, transform.apply(boundary)); // case IN: // TODO // return Expressions.predicate(Operation.IN, name, transform.apply(boundary)); default: diff --git a/api/src/main/java/org/apache/iceberg/transforms/Truncate.java b/api/src/main/java/org/apache/iceberg/transforms/Truncate.java index d65d6462c918..a0913e757a05 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Truncate.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Truncate.java @@ -243,11 +243,20 @@ public boolean canTransform(Type type) { @Override public UnboundPredicate project(String name, - BoundPredicate pred) { - if (pred.op() == NOT_NULL || pred.op() == IS_NULL) { - return Expressions.predicate(pred.op(), name); + BoundPredicate predicate) { + switch(predicate.op()){ + case NOT_NULL: + case IS_NULL: + return Expressions.predicate(predicate.op(), name); + case STARTS_WITH: + if(predicate.literal().value().length() < width()) { + Expressions.predicate(predicate.op(), name, predicate.literal().value()); + } else { + return ProjectionUtil.truncateArray(name, predicate, this); + } + default: + return ProjectionUtil.truncateArray(name, predicate, this); } - return ProjectionUtil.truncateArray(name, pred, this); } @Override diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestStartsWithProjection.java b/api/src/test/java/org/apache/iceberg/transforms/TestStartsWithProjection.java new file mode 100644 index 000000000000..46172d36a8fa --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/transforms/TestStartsWithProjection.java @@ -0,0 +1,41 @@ +package org.apache.iceberg.transforms; + +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.expressions.Literal; +import org.apache.iceberg.expressions.Projections; +import org.apache.iceberg.expressions.UnboundPredicate; +import org.apache.iceberg.types.Types; +import org.junit.Assert; +import org.junit.Test; + +import static org.apache.iceberg.TestHelpers.assertAndUnwrapUnbound; +import static org.apache.iceberg.expressions.Expressions.startsWith; +import static org.apache.iceberg.types.Types.NestedField.optional; + +public class TestStartsWithProjection { + private static final Schema SCHEMA = new Schema(optional(1, "someStringCol", Types.StringType.get())); + @Test + public void assertTruncateProjections(){ + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).truncate("someStringCol", 4).build(); + + assertTruncateProjectionInclusive(spec, startsWith("someStringCol", "ab"), "ab"); + assertTruncateProjectionInclusive(spec, startsWith("someStringCol", "abab"), "abab"); + assertTruncateProjectionInclusive(spec, startsWith("someStringCol", "ababab"), "abab"); + + } + + private void assertTruncateProjectionInclusive(PartitionSpec spec, UnboundPredicate filter, + String expectedLiteral) { + Expression projection = Projections.inclusive(spec).project(filter); + UnboundPredicate predicate = assertAndUnwrapUnbound(projection); + + Assert.assertEquals(predicate.op(), Expression.Operation.STARTS_WITH); + + Literal literal = predicate.literal(); + Truncate transform = (Truncate) spec.getFieldsBySourceId(1).get(0).transform(); + String output = transform.toHumanString((String) literal.value()); + Assert.assertEquals(expectedLiteral, output); + } +}