diff --git a/api/src/main/java/org/apache/iceberg/Schema.java b/api/src/main/java/org/apache/iceberg/Schema.java index bd07e9798e9b..07ed44b65cf7 100644 --- a/api/src/main/java/org/apache/iceberg/Schema.java +++ b/api/src/main/java/org/apache/iceberg/Schema.java @@ -60,7 +60,10 @@ public class Schema implements Serializable { @VisibleForTesting static final Map MIN_FORMAT_VERSIONS = - ImmutableMap.of(Type.TypeID.TIMESTAMP_NANO, 3, Type.TypeID.VARIANT, 3); + ImmutableMap.of( + Type.TypeID.TIMESTAMP_NANO, 3, + Type.TypeID.VARIANT, 3, + Type.TypeID.UNKNOWN, 3); private final StructType struct; private final int schemaId; diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java index 028b07827b54..02f3880dd96a 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java @@ -533,13 +533,15 @@ private static String sanitize(Type type, Object value, long now, int today) { return sanitizeTimestamp(DateTimeUtil.nanosToMicros((long) value / 1000), now); case STRING: return sanitizeString((CharSequence) value, now, today); + case UNKNOWN: + return "(unknown)"; case BOOLEAN: case UUID: case DECIMAL: case FIXED: case BINARY: case VARIANT: - // for boolean, uuid, decimal, fixed, variant, and binary, match the string result + // for boolean, uuid, decimal, fixed, variant, unknown, and binary, match the string result return sanitizeSimpleString(value.toString()); } throw new UnsupportedOperationException( diff --git a/api/src/main/java/org/apache/iceberg/types/Type.java b/api/src/main/java/org/apache/iceberg/types/Type.java index 30870535521f..f4c6f22134a5 100644 --- a/api/src/main/java/org/apache/iceberg/types/Type.java +++ b/api/src/main/java/org/apache/iceberg/types/Type.java @@ -46,7 +46,8 @@ enum TypeID { STRUCT(StructLike.class), LIST(List.class), MAP(Map.class), - VARIANT(Object.class); + VARIANT(Object.class), + UNKNOWN(Object.class); private final Class javaClass; diff --git a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java index 7fcf3db3a40d..39f2898757a6 100644 --- a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java +++ b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java @@ -536,6 +536,9 @@ private static int estimateSize(Type type) { case BINARY: case VARIANT: return 80; + case UNKNOWN: + // Consider Unknown as null + return 0; case DECIMAL: // 12 (header) + (12 + 12 + 4) (BigInteger) + 4 (scale) = 44 bytes return 44; diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index 3c03a3defb42..6882f718508b 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -55,6 +55,7 @@ private Types() {} .put(StringType.get().toString(), StringType.get()) .put(UUIDType.get().toString(), UUIDType.get()) .put(BinaryType.get().toString(), BinaryType.get()) + .put(UnknownType.get().toString(), UnknownType.get()) .buildOrThrow(); private static final Pattern FIXED = Pattern.compile("fixed\\[\\s*(\\d+)\\s*\\]"); @@ -447,6 +448,24 @@ public int hashCode() { } } + public static class UnknownType extends PrimitiveType { + private static final UnknownType INSTANCE = new UnknownType(); + + public static UnknownType get() { + return INSTANCE; + } + + @Override + public TypeID typeId() { + return TypeID.UNKNOWN; + } + + @Override + public String toString() { + return "unknown"; + } + } + public static class DecimalType extends PrimitiveType { public static DecimalType of(int precision, int scale) { return new DecimalType(precision, scale); diff --git a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java index 971f5a9e4510..ba7010f196a7 100644 --- a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java +++ b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java @@ -36,7 +36,8 @@ public class TestPartitionSpecValidation { NestedField.required(4, "d", Types.TimestampType.withZone()), NestedField.required(5, "another_d", Types.TimestampType.withZone()), NestedField.required(6, "s", Types.StringType.get()), - NestedField.required(7, "v", Types.VariantType.get())); + NestedField.required(7, "v", Types.VariantType.get()), + NestedField.required(8, "u", Types.UnknownType.get())); @Test public void testMultipleTimestampPartitions() { @@ -325,4 +326,15 @@ public void testVariantUnsupported() { .isInstanceOf(ValidationException.class) .hasMessage("Cannot partition by non-primitive source field: variant"); } + + @Test + public void testUnknownUnsupported() { + assertThatThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA) + .add(8, 1005, "unknown_partition1", Transforms.bucket(5)) + .build()) + .isInstanceOf(ValidationException.class) + .hasMessage("Invalid source type unknown for transform: bucket[5]"); + } } diff --git a/api/src/test/java/org/apache/iceberg/TestSchema.java b/api/src/test/java/org/apache/iceberg/TestSchema.java index e9cb387eebb5..46db60852b3f 100644 --- a/api/src/test/java/org/apache/iceberg/TestSchema.java +++ b/api/src/test/java/org/apache/iceberg/TestSchema.java @@ -41,7 +41,8 @@ public class TestSchema { ImmutableList.of( Types.TimestampNanoType.withoutZone(), Types.TimestampNanoType.withZone(), - Types.VariantType.get()); + Types.VariantType.get(), + Types.UnknownType.get()); private static final Schema INITIAL_DEFAULT_SCHEMA = new Schema( diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java index 5f0cac2b5e8c..3c8ff93a85a3 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java @@ -431,6 +431,20 @@ public void testVariantUnsupported() { assertThat(bucket.canTransform(Types.VariantType.get())).isFalse(); } + @Test + public void testUnknownUnsupported() { + assertThatThrownBy(() -> Transforms.bucket(Types.UnknownType.get(), 3)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot bucket by type: unknown"); + + Transform bucket = Transforms.bucket(3); + assertThatThrownBy(() -> bucket.bind(Types.UnknownType.get())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot bucket by type: unknown"); + + assertThat(bucket.canTransform(Types.UnknownType.get())).isFalse(); + } + private byte[] randomBytes(int length) { byte[] bytes = new byte[length]; testRandom.nextBytes(bytes); diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestDates.java b/api/src/test/java/org/apache/iceberg/transforms/TestDates.java index c899b4cfa1cb..625220c10925 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestDates.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestDates.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import org.apache.iceberg.expressions.Literal; import org.apache.iceberg.types.Type; @@ -267,4 +268,46 @@ public void testDatesReturnType() { Type dayResultType = day.getResultType(type); assertThat(dayResultType).isEqualTo(Types.DateType.get()); } + + @Test + public void testUnknownUnsupportedYear() { + assertThatThrownBy(() -> Transforms.year(Types.UnknownType.get())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unsupported type: unknown"); + + Transform year = Transforms.year(); + assertThatThrownBy(() -> year.bind(Types.UnknownType.get())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unsupported type: unknown"); + + assertThat(year.canTransform(Types.UnknownType.get())).isFalse(); + } + + @Test + public void testUnknownUnsupportedMonth() { + assertThatThrownBy(() -> Transforms.month(Types.UnknownType.get())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unsupported type: unknown"); + + Transform month = Transforms.month(); + assertThatThrownBy(() -> month.bind(Types.UnknownType.get())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unsupported type: unknown"); + + assertThat(month.canTransform(Types.UnknownType.get())).isFalse(); + } + + @Test + public void testUnknownUnsupportedDay() { + assertThatThrownBy(() -> Transforms.day(Types.UnknownType.get())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unsupported type: unknown"); + + Transform day = Transforms.day(); + assertThatThrownBy(() -> day.bind(Types.UnknownType.get())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unsupported type: unknown"); + + assertThat(day.canTransform(Types.UnknownType.get())).isFalse(); + } } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java index b5076e08a947..fc24be8d5698 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java @@ -157,6 +157,16 @@ public void testBigDecimalToHumanString() { .isEqualTo(decimalString); } + @Test + public void testUnknownToHumanString() { + Types.UnknownType unknownType = Types.UnknownType.get(); + Transform identity = Transforms.identity(); + + assertThat(identity.toHumanString(unknownType, null)) + .as("Should produce \"null\" for null") + .isEqualTo("null"); + } + @Test public void testVariantUnsupported() { assertThatThrownBy(() -> Transforms.identity().bind(Types.VariantType.get())) diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java index 78b0e67c686b..565426ffe9cd 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import org.apache.iceberg.expressions.Literal; import org.apache.iceberg.types.Type; @@ -633,4 +634,18 @@ public void testTimestampNanosReturnType() { Type hourResultType = hour.getResultType(type); assertThat(hourResultType).isEqualTo(Types.IntegerType.get()); } + + @Test + public void testUnknownUnsupported() { + assertThatThrownBy(() -> Transforms.hour(Types.UnknownType.get())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unsupported type: unknown"); + + Transform hour = Transforms.hour(); + assertThatThrownBy(() -> hour.bind(Types.UnknownType.get())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unsupported type: unknown"); + + assertThat(hour.canTransform(Types.UnknownType.get())).isFalse(); + } } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTruncate.java b/api/src/test/java/org/apache/iceberg/transforms/TestTruncate.java index 68527b0294d3..a6b7a31ef6cd 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTruncate.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTruncate.java @@ -116,4 +116,18 @@ public void testVerifiedIllegalWidth() { .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid truncate width: 0 (must be > 0)"); } + + @Test + public void testUnknownUnsupported() { + assertThatThrownBy(() -> Transforms.truncate(Types.UnknownType.get(), 22)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage("Cannot truncate type: unknown"); + + Transform truncate = Transforms.truncate(22); + assertThatThrownBy(() -> truncate.bind(Types.UnknownType.get())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot bind to unsupported type: unknown"); + + assertThat(truncate.canTransform(Types.UnknownType.get())).isFalse(); + } } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestVoid.java b/api/src/test/java/org/apache/iceberg/transforms/TestVoid.java new file mode 100644 index 000000000000..95069f5355f9 --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/transforms/TestVoid.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.transforms; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestVoid { + + @Test + public void testUnknownToHumanString() { + Types.UnknownType unknownType = Types.UnknownType.get(); + Transform identity = Transforms.alwaysNull(); + + assertThat(identity.toHumanString(unknownType, null)) + .as("Should produce \"null\" for null") + .isEqualTo("null"); + } +} diff --git a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java index af2ebae7e1a8..a222e8e66b8e 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java @@ -136,6 +136,15 @@ public void testVariant() throws Exception { .isEqualTo(variant); } + @Test + public void testUnknown() throws Exception { + Types.UnknownType unknown = Types.UnknownType.get(); + Type copy = TestHelpers.roundTripSerialize(unknown); + assertThat(copy) + .as("Unknown serialization should be equal to starting type") + .isEqualTo(unknown); + } + @Test public void testSchema() throws Exception { Schema schema = diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypes.java b/api/src/test/java/org/apache/iceberg/types/TestTypes.java index 226c53f1e9ce..cbc37291375f 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestTypes.java @@ -44,7 +44,7 @@ public void fromPrimitiveString() { assertThat(Types.fromPrimitiveString("Decimal(2,3)")).isEqualTo(Types.DecimalType.of(2, 3)); assertThatExceptionOfType(IllegalArgumentException.class) - .isThrownBy(() -> Types.fromPrimitiveString("Unknown")) - .withMessageContaining("Unknown"); + .isThrownBy(() -> Types.fromPrimitiveString("abcdefghij")) + .withMessage("Cannot parse type string to primitive: abcdefghij"); } } diff --git a/core/src/test/java/org/apache/iceberg/TestSortOrder.java b/core/src/test/java/org/apache/iceberg/TestSortOrder.java index 3d139543b71c..7d0688e9da96 100644 --- a/core/src/test/java/org/apache/iceberg/TestSortOrder.java +++ b/core/src/test/java/org/apache/iceberg/TestSortOrder.java @@ -36,6 +36,7 @@ import org.apache.iceberg.types.Types; import org.apache.iceberg.util.SortOrderUtil; import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestTemplate; import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.api.io.TempDir; @@ -342,6 +343,18 @@ public void testVariantUnsupported() { .hasMessage("Unsupported type for identity: variant"); } + @Test + public void testUnknownSupported() { + int fieldId = 22; + Schema v3Schema = new Schema(Types.NestedField.optional(fieldId, "u", Types.UnknownType.get())); + + SortOrder sortOrder = SortOrder.builderFor(v3Schema).asc("u").build(); + + assertThat(sortOrder.orderId()).isEqualTo(TableMetadata.INITIAL_SORT_ORDER_ID); + assertThat(sortOrder.fields()).hasSize(1); + assertThat(sortOrder.fields().get(0).sourceId()).isEqualTo(fieldId); + } + @TestTemplate public void testPreservingOrderSortedColumnNames() { SortOrder order =