-
Notifications
You must be signed in to change notification settings - Fork 2.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Introduce startsWith Predicate #327
Changes from 3 commits
5404282
7068a9f
ccf6e5b
52b1162
c45a767
ab39869
a9fb3a5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -113,6 +113,7 @@ public UnboundPredicate<Integer> project(String name, BoundPredicate<T> predicat | |
predicate.op(), name, apply(predicate.literal().value())); | ||
// case IN: | ||
// return Expressions.predicate(); | ||
case STARTS_WITH: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure if a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed. Transformed values tell us nothing about whether the original predicate is true or not. |
||
default: | ||
// comparison predicates can't be projected, notEq can't be projected | ||
// TODO: small ranges can be projected. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -213,20 +213,35 @@ public boolean canTransform(Type type) { | |
|
||
@Override | ||
public UnboundPredicate<CharSequence> project(String name, | ||
BoundPredicate<CharSequence> pred) { | ||
if (pred.op() == NOT_NULL || pred.op() == IS_NULL) { | ||
return Expressions.predicate(pred.op(), name); | ||
BoundPredicate<CharSequence> predicate) { | ||
switch (predicate.op()) { | ||
case NOT_NULL: | ||
case IS_NULL: | ||
return Expressions.predicate(predicate.op(), name); | ||
case STARTS_WITH: | ||
default: | ||
return ProjectionUtil.truncateArray(name, predicate, this); | ||
} | ||
return ProjectionUtil.truncateArray(name, pred, this); | ||
} | ||
|
||
@Override | ||
public UnboundPredicate<CharSequence> projectStrict(String name, | ||
BoundPredicate<CharSequence> pred) { | ||
if (pred.op() == NOT_NULL || pred.op() == IS_NULL) { | ||
return Expressions.predicate(pred.op(), name); | ||
BoundPredicate<CharSequence> predicate) { | ||
switch (predicate.op()) { | ||
case IS_NULL: | ||
case NOT_NULL: | ||
return Expressions.predicate(predicate.op(), name); | ||
case STARTS_WITH: | ||
if (predicate.literal().value().length() < width()) { | ||
return Expressions.predicate(predicate.op(), name, predicate.literal().value()); | ||
} else if (predicate.literal().value().length() == width()) { | ||
return Expressions.equal(name, predicate.literal().value()); | ||
} else { | ||
return null; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does returning There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Returning |
||
} | ||
default: | ||
return ProjectionUtil.truncateArrayStrict(name, predicate, this); | ||
} | ||
return ProjectionUtil.truncateArrayStrict(name, pred, this); | ||
} | ||
|
||
@Override | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.iceberg.transforms; | ||
|
||
import org.apache.iceberg.PartitionSpec; | ||
import org.apache.iceberg.Schema; | ||
import org.apache.iceberg.TestHelpers; | ||
import org.apache.iceberg.expressions.Binder; | ||
import org.apache.iceberg.expressions.BoundPredicate; | ||
import org.apache.iceberg.expressions.Evaluator; | ||
import org.apache.iceberg.expressions.Expression; | ||
import org.apache.iceberg.expressions.False; | ||
import org.apache.iceberg.expressions.Literal; | ||
import org.apache.iceberg.expressions.Projections; | ||
import org.apache.iceberg.expressions.UnboundPredicate; | ||
import org.apache.iceberg.types.Types; | ||
import org.junit.Assert; | ||
import org.junit.Test; | ||
|
||
import static org.apache.iceberg.TestHelpers.assertAndUnwrapUnbound; | ||
import static org.apache.iceberg.expressions.Expressions.startsWith; | ||
import static org.apache.iceberg.types.Types.NestedField.optional; | ||
|
||
public class TestStartsWith { | ||
|
||
private static final String COLUMN = "someStringCol"; | ||
private static final Schema SCHEMA = new Schema(optional(1, COLUMN, Types.StringType.get())); | ||
|
||
@Test | ||
public void assertTruncateProjections() { | ||
sujithjay marked this conversation as resolved.
Show resolved
Hide resolved
|
||
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).truncate(COLUMN, 4).build(); | ||
|
||
assertProjectionInclusive(spec, startsWith(COLUMN, "ab"), "ab", Expression.Operation.STARTS_WITH); | ||
assertProjectionInclusive(spec, startsWith(COLUMN, "abab"), "abab", Expression.Operation.STARTS_WITH); | ||
assertProjectionInclusive(spec, startsWith(COLUMN, "ababab"), "abab", Expression.Operation.STARTS_WITH); | ||
|
||
assertProjectionStrict(spec, startsWith(COLUMN, "ab"), "ab", Expression.Operation.STARTS_WITH); | ||
assertProjectionStrict(spec, startsWith(COLUMN, "abab"), "abab", Expression.Operation.EQ); | ||
assertProjectionFalse(spec, startsWith(COLUMN, "ababab")); | ||
sujithjay marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
@Test | ||
public void assertTruncateString() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can some tests be added in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Given that, tests in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @sujithjay, I agree with @moulimukherjee. But you're right that currently an The fix is to implement There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 for ^ (implement |
||
Truncate<String> trunc = Truncate.get(Types.StringType.get(), 2); | ||
Expression expr = startsWith(COLUMN, "abcde"); | ||
BoundPredicate<String> boundExpr = (BoundPredicate<String>) Binder.bind(SCHEMA.asStruct(), expr, false); | ||
|
||
UnboundPredicate<String> projected = trunc.project(COLUMN, boundExpr); | ||
Evaluator evaluator = new Evaluator(SCHEMA.asStruct(), projected); | ||
|
||
Assert.assertTrue("startsWith(abcde, truncate(abcde,2)) => true", | ||
evaluator.eval(TestHelpers.Row.of("abcde"))); | ||
} | ||
|
||
private void assertProjectionInclusive(PartitionSpec spec, UnboundPredicate<?> filter, | ||
String expectedLiteral, Expression.Operation expectedOp) { | ||
Expression projection = Projections.inclusive(spec).project(filter); | ||
assertProjection(spec, expectedLiteral, projection, expectedOp); | ||
} | ||
|
||
private void assertProjectionStrict(PartitionSpec spec, UnboundPredicate<?> filter, | ||
String expectedLiteral, Expression.Operation expectedOp) { | ||
Expression projection = Projections.strict(spec).project(filter); | ||
assertProjection(spec, expectedLiteral, projection, expectedOp); | ||
} | ||
|
||
private void assertProjection(PartitionSpec spec, String expectedLiteral, Expression projection, | ||
Expression.Operation expectedOp) { | ||
UnboundPredicate<?> predicate = assertAndUnwrapUnbound(projection); | ||
Literal literal = predicate.literal(); | ||
Truncate<CharSequence> transform = (Truncate<CharSequence>) spec.getFieldsBySourceId(1).get(0).transform(); | ||
String output = transform.toHumanString((String) literal.value()); | ||
|
||
Assert.assertEquals(expectedOp, predicate.op()); | ||
Assert.assertEquals(expectedLiteral, output); | ||
} | ||
|
||
private void assertProjectionFalse(PartitionSpec spec, UnboundPredicate<?> filter) { | ||
Expression projection = Projections.strict(spec).project(filter); | ||
Assert.assertTrue(projection instanceof False); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't catch this the first time through, but if the
startsWith
method above is parameterized byT
then there shouldn't be a need to cast these to literals and bound references parameterized byString
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
startsWith
was parameterised withT
, but I missed removing the casts here.