Skip to content

Commit

Permalink
API: Add UnknownType (#12012)
Browse files Browse the repository at this point in the history
* API: Add `UnknownType`

* Make the CI happy

* Thanks Eduard!

* Add tests for the other transforms as well

* Thanks Honah!

* Thanks Russell!

* Thanks Eduard!
  • Loading branch information
Fokko authored Jan 24, 2025
1 parent 80a5ef1 commit a04220a
Show file tree
Hide file tree
Showing 16 changed files with 203 additions and 7 deletions.
5 changes: 4 additions & 1 deletion api/src/main/java/org/apache/iceberg/Schema.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,10 @@ public class Schema implements Serializable {

@VisibleForTesting
static final Map<Type.TypeID, Integer> MIN_FORMAT_VERSIONS =
ImmutableMap.of(Type.TypeID.TIMESTAMP_NANO, 3, Type.TypeID.VARIANT, 3);
ImmutableMap.of(
Type.TypeID.TIMESTAMP_NANO, 3,
Type.TypeID.VARIANT, 3,
Type.TypeID.UNKNOWN, 3);

private final StructType struct;
private final int schemaId;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -533,13 +533,15 @@ private static String sanitize(Type type, Object value, long now, int today) {
return sanitizeTimestamp(DateTimeUtil.nanosToMicros((long) value / 1000), now);
case STRING:
return sanitizeString((CharSequence) value, now, today);
case UNKNOWN:
return "(unknown)";
case BOOLEAN:
case UUID:
case DECIMAL:
case FIXED:
case BINARY:
case VARIANT:
// for boolean, uuid, decimal, fixed, variant, and binary, match the string result
// for boolean, uuid, decimal, fixed, variant, unknown, and binary, match the string result
return sanitizeSimpleString(value.toString());
}
throw new UnsupportedOperationException(
Expand Down
3 changes: 2 additions & 1 deletion api/src/main/java/org/apache/iceberg/types/Type.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ enum TypeID {
STRUCT(StructLike.class),
LIST(List.class),
MAP(Map.class),
VARIANT(Object.class);
VARIANT(Object.class),
UNKNOWN(Object.class);

private final Class<?> javaClass;

Expand Down
3 changes: 3 additions & 0 deletions api/src/main/java/org/apache/iceberg/types/TypeUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,9 @@ private static int estimateSize(Type type) {
case BINARY:
case VARIANT:
return 80;
case UNKNOWN:
// Consider Unknown as null
return 0;
case DECIMAL:
// 12 (header) + (12 + 12 + 4) (BigInteger) + 4 (scale) = 44 bytes
return 44;
Expand Down
19 changes: 19 additions & 0 deletions api/src/main/java/org/apache/iceberg/types/Types.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ private Types() {}
.put(StringType.get().toString(), StringType.get())
.put(UUIDType.get().toString(), UUIDType.get())
.put(BinaryType.get().toString(), BinaryType.get())
.put(UnknownType.get().toString(), UnknownType.get())
.buildOrThrow();

private static final Pattern FIXED = Pattern.compile("fixed\\[\\s*(\\d+)\\s*\\]");
Expand Down Expand Up @@ -447,6 +448,24 @@ public int hashCode() {
}
}

public static class UnknownType extends PrimitiveType {
private static final UnknownType INSTANCE = new UnknownType();

public static UnknownType get() {
return INSTANCE;
}

@Override
public TypeID typeId() {
return TypeID.UNKNOWN;
}

@Override
public String toString() {
return "unknown";
}
}

public static class DecimalType extends PrimitiveType {
public static DecimalType of(int precision, int scale) {
return new DecimalType(precision, scale);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ public class TestPartitionSpecValidation {
NestedField.required(4, "d", Types.TimestampType.withZone()),
NestedField.required(5, "another_d", Types.TimestampType.withZone()),
NestedField.required(6, "s", Types.StringType.get()),
NestedField.required(7, "v", Types.VariantType.get()));
NestedField.required(7, "v", Types.VariantType.get()),
NestedField.required(8, "u", Types.UnknownType.get()));

@Test
public void testMultipleTimestampPartitions() {
Expand Down Expand Up @@ -325,4 +326,15 @@ public void testVariantUnsupported() {
.isInstanceOf(ValidationException.class)
.hasMessage("Cannot partition by non-primitive source field: variant");
}

@Test
public void testUnknownUnsupported() {
assertThatThrownBy(
() ->
PartitionSpec.builderFor(SCHEMA)
.add(8, 1005, "unknown_partition1", Transforms.bucket(5))
.build())
.isInstanceOf(ValidationException.class)
.hasMessage("Invalid source type unknown for transform: bucket[5]");
}
}
3 changes: 2 additions & 1 deletion api/src/test/java/org/apache/iceberg/TestSchema.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ public class TestSchema {
ImmutableList.of(
Types.TimestampNanoType.withoutZone(),
Types.TimestampNanoType.withZone(),
Types.VariantType.get());
Types.VariantType.get(),
Types.UnknownType.get());

private static final Schema INITIAL_DEFAULT_SCHEMA =
new Schema(
Expand Down
14 changes: 14 additions & 0 deletions api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,20 @@ public void testVariantUnsupported() {
assertThat(bucket.canTransform(Types.VariantType.get())).isFalse();
}

@Test
public void testUnknownUnsupported() {
assertThatThrownBy(() -> Transforms.bucket(Types.UnknownType.get(), 3))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Cannot bucket by type: unknown");

Transform<Object, Integer> bucket = Transforms.bucket(3);
assertThatThrownBy(() -> bucket.bind(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Cannot bucket by type: unknown");

assertThat(bucket.canTransform(Types.UnknownType.get())).isFalse();
}

private byte[] randomBytes(int length) {
byte[] bytes = new byte[length];
testRandom.nextBytes(bytes);
Expand Down
43 changes: 43 additions & 0 deletions api/src/test/java/org/apache/iceberg/transforms/TestDates.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.iceberg.transforms;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import org.apache.iceberg.expressions.Literal;
import org.apache.iceberg.types.Type;
Expand Down Expand Up @@ -267,4 +268,46 @@ public void testDatesReturnType() {
Type dayResultType = day.getResultType(type);
assertThat(dayResultType).isEqualTo(Types.DateType.get());
}

@Test
public void testUnknownUnsupportedYear() {
assertThatThrownBy(() -> Transforms.year(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Unsupported type: unknown");

Transform<Object, Integer> year = Transforms.year();
assertThatThrownBy(() -> year.bind(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Unsupported type: unknown");

assertThat(year.canTransform(Types.UnknownType.get())).isFalse();
}

@Test
public void testUnknownUnsupportedMonth() {
assertThatThrownBy(() -> Transforms.month(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Unsupported type: unknown");

Transform<Object, Integer> month = Transforms.month();
assertThatThrownBy(() -> month.bind(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Unsupported type: unknown");

assertThat(month.canTransform(Types.UnknownType.get())).isFalse();
}

@Test
public void testUnknownUnsupportedDay() {
assertThatThrownBy(() -> Transforms.day(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Unsupported type: unknown");

Transform<Object, Integer> day = Transforms.day();
assertThatThrownBy(() -> day.bind(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Unsupported type: unknown");

assertThat(day.canTransform(Types.UnknownType.get())).isFalse();
}
}
10 changes: 10 additions & 0 deletions api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,16 @@ public void testBigDecimalToHumanString() {
.isEqualTo(decimalString);
}

@Test
public void testUnknownToHumanString() {
Types.UnknownType unknownType = Types.UnknownType.get();
Transform<Object, Object> identity = Transforms.identity();

assertThat(identity.toHumanString(unknownType, null))
.as("Should produce \"null\" for null")
.isEqualTo("null");
}

@Test
public void testVariantUnsupported() {
assertThatThrownBy(() -> Transforms.identity().bind(Types.VariantType.get()))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.iceberg.transforms;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import org.apache.iceberg.expressions.Literal;
import org.apache.iceberg.types.Type;
Expand Down Expand Up @@ -633,4 +634,18 @@ public void testTimestampNanosReturnType() {
Type hourResultType = hour.getResultType(type);
assertThat(hourResultType).isEqualTo(Types.IntegerType.get());
}

@Test
public void testUnknownUnsupported() {
assertThatThrownBy(() -> Transforms.hour(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Unsupported type: unknown");

Transform<Object, Integer> hour = Transforms.hour();
assertThatThrownBy(() -> hour.bind(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Unsupported type: unknown");

assertThat(hour.canTransform(Types.UnknownType.get())).isFalse();
}
}
14 changes: 14 additions & 0 deletions api/src/test/java/org/apache/iceberg/transforms/TestTruncate.java
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,18 @@ public void testVerifiedIllegalWidth() {
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Invalid truncate width: 0 (must be > 0)");
}

@Test
public void testUnknownUnsupported() {
assertThatThrownBy(() -> Transforms.truncate(Types.UnknownType.get(), 22))
.isInstanceOf(UnsupportedOperationException.class)
.hasMessage("Cannot truncate type: unknown");

Transform<Object, Object> truncate = Transforms.truncate(22);
assertThatThrownBy(() -> truncate.bind(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Cannot bind to unsupported type: unknown");

assertThat(truncate.canTransform(Types.UnknownType.get())).isFalse();
}
}
37 changes: 37 additions & 0 deletions api/src/test/java/org/apache/iceberg/transforms/TestVoid.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.transforms;

import static org.assertj.core.api.Assertions.assertThat;

import org.apache.iceberg.types.Types;
import org.junit.jupiter.api.Test;

public class TestVoid {

@Test
public void testUnknownToHumanString() {
Types.UnknownType unknownType = Types.UnknownType.get();
Transform<Object, Void> identity = Transforms.alwaysNull();

assertThat(identity.toHumanString(unknownType, null))
.as("Should produce \"null\" for null")
.isEqualTo("null");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,15 @@ public void testVariant() throws Exception {
.isEqualTo(variant);
}

@Test
public void testUnknown() throws Exception {
Types.UnknownType unknown = Types.UnknownType.get();
Type copy = TestHelpers.roundTripSerialize(unknown);
assertThat(copy)
.as("Unknown serialization should be equal to starting type")
.isEqualTo(unknown);
}

@Test
public void testSchema() throws Exception {
Schema schema =
Expand Down
4 changes: 2 additions & 2 deletions api/src/test/java/org/apache/iceberg/types/TestTypes.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public void fromPrimitiveString() {
assertThat(Types.fromPrimitiveString("Decimal(2,3)")).isEqualTo(Types.DecimalType.of(2, 3));

assertThatExceptionOfType(IllegalArgumentException.class)
.isThrownBy(() -> Types.fromPrimitiveString("Unknown"))
.withMessageContaining("Unknown");
.isThrownBy(() -> Types.fromPrimitiveString("abcdefghij"))
.withMessage("Cannot parse type string to primitive: abcdefghij");
}
}
13 changes: 13 additions & 0 deletions core/src/test/java/org/apache/iceberg/TestSortOrder.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.SortOrderUtil;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestTemplate;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.io.TempDir;
Expand Down Expand Up @@ -342,6 +343,18 @@ public void testVariantUnsupported() {
.hasMessage("Unsupported type for identity: variant");
}

@Test
public void testUnknownSupported() {
int fieldId = 22;
Schema v3Schema = new Schema(Types.NestedField.optional(fieldId, "u", Types.UnknownType.get()));

SortOrder sortOrder = SortOrder.builderFor(v3Schema).asc("u").build();

assertThat(sortOrder.orderId()).isEqualTo(TableMetadata.INITIAL_SORT_ORDER_ID);
assertThat(sortOrder.fields()).hasSize(1);
assertThat(sortOrder.fields().get(0).sourceId()).isEqualTo(fieldId);
}

@TestTemplate
public void testPreservingOrderSortedColumnNames() {
SortOrder order =
Expand Down

0 comments on commit a04220a

Please sign in to comment.