Skip to content

Commit

Permalink
Supports startsWith predicates (#327)
Browse files Browse the repository at this point in the history
Co-authored-by: Renato Marroquin <[email protected]>
Co-authored-by: Lior Baber <[email protected]>
Co-authored-by: Sujith Jay Nair <[email protected]>
  • Loading branch information
3 people authored and rdblue committed Aug 12, 2019
1 parent ba1b97e commit 5cfc119
Show file tree
Hide file tree
Showing 12 changed files with 166 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -142,5 +142,10 @@ public <T> Boolean in(BoundReference<T> ref, Literal<T> lit) {
public <T> Boolean notIn(BoundReference<T> ref, Literal<T> lit) {
return !in(ref, lit);
}

@Override
public <T> Boolean startsWith(BoundReference<T> ref, Literal<T> lit) {
return ((String) ref.get(struct)).startsWith((String) lit.value());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ enum Operation {
NOT_IN,
NOT,
AND,
OR;
OR,
STARTS_WITH;

/**
* @return the operation used when this is negated
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ public <T> R notIn(BoundReference<T> ref, Literal<T> lit) {
return null;
}

public <T> R startsWith(BoundReference<T> ref, Literal<T> lit) {
throw new UnsupportedOperationException("Unsupported operation.");
}

@Override
public <T> R predicate(BoundPredicate<T> pred) {
switch (pred.op()) {
Expand All @@ -120,6 +124,8 @@ public <T> R predicate(BoundPredicate<T> pred) {
return in(pred.ref(), pred.literal());
case NOT_IN:
return notIn(pred.ref(), pred.literal());
case STARTS_WITH:
return startsWith(pred.ref(), pred.literal());
default:
throw new UnsupportedOperationException(
"Unknown operation for predicate: " + pred.op());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ public static <T> UnboundPredicate<T> notEqual(String name, T value) {
return new UnboundPredicate<>(Expression.Operation.NOT_EQ, ref(name), value);
}

public static UnboundPredicate<String> startsWith(String name, String value) {
return new UnboundPredicate<>(Expression.Operation.STARTS_WITH, ref(name), value);
}

public static <T> UnboundPredicate<T> predicate(Operation op, String name, T value) {
Preconditions.checkArgument(op != Operation.IS_NULL && op != Operation.NOT_NULL,
"Cannot create %s predicate inclusive a value", op);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ public String toString() {
return String.valueOf(ref()) + " == " + literal();
case NOT_EQ:
return String.valueOf(ref()) + " != " + literal();
case STARTS_WITH:
return ref() + " startsWith \"" + literal() + "\"";
// case IN:
// break;
// case NOT_IN:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,12 @@ public <T> Expression notEq(BoundReference<T> ref, Literal<T> lit) {
return (cmp.compare(ref.get(struct), lit.value()) != 0) ? alwaysTrue() : alwaysFalse();
}

@Override
public <T> Expression startsWith(BoundReference<T> ref, Literal<T> lit) {
Comparator<T> cmp = lit.comparator();
return ((String) ref.get(struct)).startsWith((String) lit.value()) ? alwaysTrue() : alwaysFalse();
}

@Override
@SuppressWarnings("unchecked")
public <T> Expression predicate(BoundPredicate<T> pred) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ public UnboundPredicate<Integer> project(String name, BoundPredicate<T> predicat
predicate.op(), name, apply(predicate.literal().value()));
// case IN:
// return Expressions.predicate();
case STARTS_WITH:
default:
// comparison predicates can't be projected, notEq can't be projected
// TODO: small ranges can be projected.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ static <S, T> UnboundPredicate<T> truncateArray(
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary));
case EQ:
return predicate(Expression.Operation.EQ, name, transform.apply(boundary));
case STARTS_WITH:
return predicate(Expression.Operation.STARTS_WITH, name, transform.apply(boundary));
// case IN: // TODO
// return Expressions.predicate(Operation.IN, name, transform.apply(boundary));
default:
Expand Down
31 changes: 23 additions & 8 deletions api/src/main/java/org/apache/iceberg/transforms/Truncate.java
Original file line number Diff line number Diff line change
Expand Up @@ -213,20 +213,35 @@ public boolean canTransform(Type type) {

@Override
public UnboundPredicate<CharSequence> project(String name,
BoundPredicate<CharSequence> pred) {
if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
return Expressions.predicate(pred.op(), name);
BoundPredicate<CharSequence> predicate) {
switch (predicate.op()) {
case NOT_NULL:
case IS_NULL:
return Expressions.predicate(predicate.op(), name);
case STARTS_WITH:
default:
return ProjectionUtil.truncateArray(name, predicate, this);
}
return ProjectionUtil.truncateArray(name, pred, this);
}

@Override
public UnboundPredicate<CharSequence> projectStrict(String name,
BoundPredicate<CharSequence> pred) {
if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
return Expressions.predicate(pred.op(), name);
BoundPredicate<CharSequence> predicate) {
switch (predicate.op()) {
case IS_NULL:
case NOT_NULL:
return Expressions.predicate(predicate.op(), name);
case STARTS_WITH:
if (predicate.literal().value().length() < width()) {
return Expressions.predicate(predicate.op(), name, predicate.literal().value());
} else if (predicate.literal().value().length() == width()) {
return Expressions.equal(name, predicate.literal().value());
} else {
return null;
}
default:
return ProjectionUtil.truncateArrayStrict(name, predicate, this);
}
return ProjectionUtil.truncateArrayStrict(name, pred, this);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import static org.apache.iceberg.expressions.Expressions.lessThan;
import static org.apache.iceberg.expressions.Expressions.not;
import static org.apache.iceberg.expressions.Expressions.or;
import static org.apache.iceberg.expressions.Expressions.startsWith;
import static org.apache.iceberg.types.Types.NestedField.required;

public class TestExpressionBinding {
Expand Down Expand Up @@ -131,6 +132,18 @@ public void testNot() {
Assert.assertEquals("Should bind x correctly", 0, child.ref().fieldId());
}

@Test
public void testStartsWith() {
StructType struct = StructType.of(required(0, "s", Types.StringType.get()));
Expression expr = startsWith("s", "abc");
Expression boundExpr = Binder.bind(struct, expr);
TestHelpers.assertAllReferencesBound("StartsWith", boundExpr);
// make sure the expression is a StartsWith
BoundPredicate<?> pred = TestHelpers.assertAndUnwrap(boundExpr, BoundPredicate.class);
Assert.assertEquals("Should be right operation", Expression.Operation.STARTS_WITH, pred.op());
Assert.assertEquals("Should bind s correctly", 0, pred.ref().fieldId());
}

@Test
public void testAlwaysTrue() {
Assert.assertEquals("Should not change alwaysTrue",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.transforms;

import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.expressions.Binder;
import org.apache.iceberg.expressions.BoundPredicate;
import org.apache.iceberg.expressions.Evaluator;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.False;
import org.apache.iceberg.expressions.Literal;
import org.apache.iceberg.expressions.Projections;
import org.apache.iceberg.expressions.UnboundPredicate;
import org.apache.iceberg.types.Types;
import org.junit.Assert;
import org.junit.Test;

import static org.apache.iceberg.TestHelpers.assertAndUnwrapUnbound;
import static org.apache.iceberg.expressions.Expressions.startsWith;
import static org.apache.iceberg.types.Types.NestedField.optional;

public class TestStartsWith {

private static final String COLUMN = "someStringCol";
private static final Schema SCHEMA = new Schema(optional(1, COLUMN, Types.StringType.get()));

@Test
public void testTruncateProjections() {
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).truncate(COLUMN, 4).build();

assertProjectionInclusive(spec, startsWith(COLUMN, "ab"), "ab", Expression.Operation.STARTS_WITH);
assertProjectionInclusive(spec, startsWith(COLUMN, "abab"), "abab", Expression.Operation.STARTS_WITH);
assertProjectionInclusive(spec, startsWith(COLUMN, "ababab"), "abab", Expression.Operation.STARTS_WITH);

assertProjectionStrict(spec, startsWith(COLUMN, "ab"), "ab", Expression.Operation.STARTS_WITH);
assertProjectionStrict(spec, startsWith(COLUMN, "abab"), "abab", Expression.Operation.EQ);

Expression projection = Projections.strict(spec).project(startsWith(COLUMN, "ababab"));
Assert.assertTrue(projection instanceof False);
}

@Test
public void testTruncateString() {
Truncate<String> trunc = Truncate.get(Types.StringType.get(), 2);
Expression expr = startsWith(COLUMN, "abcde");
BoundPredicate<String> boundExpr = (BoundPredicate<String>) Binder.bind(SCHEMA.asStruct(), expr, false);

UnboundPredicate<String> projected = trunc.project(COLUMN, boundExpr);
Evaluator evaluator = new Evaluator(SCHEMA.asStruct(), projected);

Assert.assertTrue("startsWith(abcde, truncate(abcde,2)) => true",
evaluator.eval(TestHelpers.Row.of("abcde")));
}

private void assertProjectionInclusive(PartitionSpec spec, UnboundPredicate<?> filter,
String expectedLiteral, Expression.Operation expectedOp) {
Expression projection = Projections.inclusive(spec).project(filter);
assertProjection(spec, expectedLiteral, projection, expectedOp);
}

private void assertProjectionStrict(PartitionSpec spec, UnboundPredicate<?> filter,
String expectedLiteral, Expression.Operation expectedOp) {
Expression projection = Projections.strict(spec).project(filter);
assertProjection(spec, expectedLiteral, projection, expectedOp);
}

private void assertProjection(PartitionSpec spec, String expectedLiteral, Expression projection,
Expression.Operation expectedOp) {
UnboundPredicate<?> predicate = assertAndUnwrapUnbound(projection);
Literal literal = predicate.literal();
Truncate<CharSequence> transform = (Truncate<CharSequence>) spec.getFieldsBySourceId(1).get(0).transform();
String output = transform.toHumanString((String) literal.value());

Assert.assertEquals(expectedOp, predicate.op());
Assert.assertEquals(expectedLiteral, output);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import static org.apache.iceberg.expressions.Expressions.lessThan;
import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual;
import static org.apache.iceberg.expressions.Expressions.notEqual;
import static org.apache.iceberg.expressions.Expressions.startsWith;

public class TestTruncatesResiduals {

Expand Down Expand Up @@ -173,5 +174,10 @@ public void testStringTruncateTransformResiduals() {
assertResidualValue(spec, notEqual("value", "bcd"), "ab", Expression.Operation.TRUE);
assertResidualPredicate(spec, notEqual("value", "bcd"), "bc");
assertResidualValue(spec, notEqual("value", "bcd"), "cd", Expression.Operation.TRUE);

// starts with
assertResidualValue(spec, startsWith("value", "bcd"), "ab", Expression.Operation.FALSE);
assertResidualPredicate(spec, startsWith("value", "bcd"), "bc");
assertResidualValue(spec, startsWith("value", "bcd"), "cd", Expression.Operation.FALSE);
}
}

0 comments on commit 5cfc119

Please sign in to comment.