From 1b623c573aaed55e8317ed868e1f0f329a00a816 Mon Sep 17 00:00:00 2001 From: Christian Thiel Date: Mon, 20 Jan 2025 09:02:33 +0100 Subject: [PATCH 1/7] Deprecate snapshot-id of SetStatisticsUpdate --- .palantir/revapi.yml | 4 ++++ .../org/apache/iceberg/UpdateStatistics.java | 10 ++++++++++ .../java/org/apache/iceberg/MetadataUpdate.java | 17 +++++++++++++---- .../apache/iceberg/MetadataUpdateParser.java | 3 +-- .../java/org/apache/iceberg/SetStatistics.java | 14 +++++++++++++- .../java/org/apache/iceberg/TableMetadata.java | 15 ++++++++++++++- .../iceberg/TestMetadataUpdateParser.java | 1 - open-api/rest-catalog-open-api.py | 2 +- open-api/rest-catalog-open-api.yaml | 4 +++- 9 files changed, 59 insertions(+), 11 deletions(-) diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index 18c63fbe7bb1..fef4d663202d 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -1146,6 +1146,10 @@ acceptedBreaks: \ org.apache.iceberg.TableMetadata)" justification: "Removing deprecated code" "1.7.0": + org.apache.iceberg:iceberg-api: + - code: "java.method.addedToInterface" + new: "method org.apache.iceberg.UpdateStatistics org.apache.iceberg.UpdateStatistics::setStatistics(org.apache.iceberg.StatisticsFile)" + justification: "Redundant snapshot-id in UpdateStatistics" org.apache.iceberg:iceberg-core: - code: "java.method.removed" old: "method org.apache.iceberg.deletes.PositionDeleteIndex\ diff --git a/api/src/main/java/org/apache/iceberg/UpdateStatistics.java b/api/src/main/java/org/apache/iceberg/UpdateStatistics.java index 6e367122dcc9..e6ce843f1d2a 100644 --- a/api/src/main/java/org/apache/iceberg/UpdateStatistics.java +++ b/api/src/main/java/org/apache/iceberg/UpdateStatistics.java @@ -27,9 +27,19 @@ public interface UpdateStatistics extends PendingUpdate> { * the snapshot if any exists. * * @return this for method chaining + * @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use setStatistics(statisticsFile). */ + @Deprecated UpdateStatistics setStatistics(long snapshotId, StatisticsFile statisticsFile); + /** + * Set the table's statistics file for given snapshot, replacing the previous statistics file for + * the snapshot if any exists. + * + * @return this for method chaining + */ + UpdateStatistics setStatistics(StatisticsFile statisticsFile); + /** * Remove the table's statistics file for given snapshot. * diff --git a/core/src/main/java/org/apache/iceberg/MetadataUpdate.java b/core/src/main/java/org/apache/iceberg/MetadataUpdate.java index ff008c9d43e2..b76fcf9f017d 100644 --- a/core/src/main/java/org/apache/iceberg/MetadataUpdate.java +++ b/core/src/main/java/org/apache/iceberg/MetadataUpdate.java @@ -231,16 +231,25 @@ public void applyTo(TableMetadata.Builder metadataBuilder) { } class SetStatistics implements MetadataUpdate { - private final long snapshotId; private final StatisticsFile statisticsFile; + /** + * Set statistics for a snapshot. + * + * @deprecated since 1.8.0, will be removed in 1.9.0 or 2.0.0, use + * SetStatistics(statisticsFile). + */ + @Deprecated public SetStatistics(long snapshotId, StatisticsFile statisticsFile) { - this.snapshotId = snapshotId; + this.statisticsFile = statisticsFile; + } + + public SetStatistics(StatisticsFile statisticsFile) { this.statisticsFile = statisticsFile; } public long snapshotId() { - return snapshotId; + return statisticsFile.snapshotId(); } public StatisticsFile statisticsFile() { @@ -249,7 +258,7 @@ public StatisticsFile statisticsFile() { @Override public void applyTo(TableMetadata.Builder metadataBuilder) { - metadataBuilder.setStatistics(snapshotId, statisticsFile); + metadataBuilder.setStatistics(statisticsFile); } } diff --git a/core/src/main/java/org/apache/iceberg/MetadataUpdateParser.java b/core/src/main/java/org/apache/iceberg/MetadataUpdateParser.java index 08d4b3398f10..6a6a34c92d18 100644 --- a/core/src/main/java/org/apache/iceberg/MetadataUpdateParser.java +++ b/core/src/main/java/org/apache/iceberg/MetadataUpdateParser.java @@ -512,10 +512,9 @@ private static MetadataUpdate readSetDefaultSortOrder(JsonNode node) { } private static MetadataUpdate readSetStatistics(JsonNode node) { - long snapshotId = JsonUtil.getLong(SNAPSHOT_ID, node); JsonNode statisticsFileNode = JsonUtil.get(STATISTICS, node); StatisticsFile statisticsFile = StatisticsFileParser.fromJson(statisticsFileNode); - return new MetadataUpdate.SetStatistics(snapshotId, statisticsFile); + return new MetadataUpdate.SetStatistics(statisticsFile); } private static MetadataUpdate readRemoveStatistics(JsonNode node) { diff --git a/core/src/main/java/org/apache/iceberg/SetStatistics.java b/core/src/main/java/org/apache/iceberg/SetStatistics.java index 41c7254d6cdc..7cbdff61634d 100644 --- a/core/src/main/java/org/apache/iceberg/SetStatistics.java +++ b/core/src/main/java/org/apache/iceberg/SetStatistics.java @@ -32,10 +32,22 @@ public SetStatistics(TableOperations ops) { this.ops = ops; } + /** + * Add a new schema. + * + * @deprecated since 1.8.0, will be removed in 1.9.0 or 2.0.0, use setStatistics(statisticsFile). + */ + @Deprecated @Override public UpdateStatistics setStatistics(long snapshotId, StatisticsFile statisticsFile) { Preconditions.checkArgument(snapshotId == statisticsFile.snapshotId()); - statisticsToSet.put(snapshotId, Optional.of(statisticsFile)); + statisticsToSet.put(statisticsFile.snapshotId(), Optional.of(statisticsFile)); + return this; + } + + @Override + public UpdateStatistics setStatistics(StatisticsFile statisticsFile) { + statisticsToSet.put(statisticsFile.snapshotId(), Optional.of(statisticsFile)); return this; } diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java index 4ba3bdf8d737..a532ff00ee4e 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadata.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java @@ -1319,6 +1319,12 @@ private Builder resetMainBranch() { return this; } + /** + * Set a statistics file for a snapshot. + * + * @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use setStatistics(statisticsFile). + */ + @Deprecated public Builder setStatistics(long snapshotId, StatisticsFile statisticsFile) { Preconditions.checkNotNull(statisticsFile, "statisticsFile is null"); Preconditions.checkArgument( @@ -1327,7 +1333,14 @@ public Builder setStatistics(long snapshotId, StatisticsFile statisticsFile) { snapshotId, statisticsFile.snapshotId()); statisticsFiles.put(statisticsFile.snapshotId(), ImmutableList.of(statisticsFile)); - changes.add(new MetadataUpdate.SetStatistics(snapshotId, statisticsFile)); + changes.add(new MetadataUpdate.SetStatistics(statisticsFile)); + return this; + } + + public Builder setStatistics(StatisticsFile statisticsFile) { + Preconditions.checkNotNull(statisticsFile, "statisticsFile is null"); + statisticsFiles.put(statisticsFile.snapshotId(), ImmutableList.of(statisticsFile)); + changes.add(new MetadataUpdate.SetStatistics(statisticsFile)); return this; } diff --git a/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java b/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java index 6a65bf762880..1ce319a34eba 100644 --- a/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java +++ b/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java @@ -783,7 +783,6 @@ public void testSetStatistics() { long snapshotId = 1940541653261589030L; MetadataUpdate expected = new MetadataUpdate.SetStatistics( - snapshotId, new GenericStatisticsFile( snapshotId, "s3://bucket/warehouse/stats.puffin", diff --git a/open-api/rest-catalog-open-api.py b/open-api/rest-catalog-open-api.py index c3ec276b9e8b..06e7e5c98f02 100644 --- a/open-api/rest-catalog-open-api.py +++ b/open-api/rest-catalog-open-api.py @@ -1024,7 +1024,7 @@ class Term(BaseModel): class SetStatisticsUpdate(BaseUpdate): action: str = Field('set-statistics', const=True) - snapshot_id: int = Field(..., alias='snapshot-id') + snapshot_id: Optional[int] = Field(None, alias='snapshot-id') statistics: StatisticsFile diff --git a/open-api/rest-catalog-open-api.yaml b/open-api/rest-catalog-open-api.yaml index 5df05094a86d..b5032dcf81fb 100644 --- a/open-api/rest-catalog-open-api.yaml +++ b/open-api/rest-catalog-open-api.yaml @@ -2881,7 +2881,6 @@ components: allOf: - $ref: '#/components/schemas/BaseUpdate' required: - - snapshot-id - statistics properties: action: @@ -2890,6 +2889,9 @@ components: snapshot-id: type: integer format: int64 + description: + This optional field is **DEPRECATED for REMOVAL** since it contains redundant information. + Clients should use the `statistics.snapshot-id` field instead. statistics: $ref: '#/components/schemas/StatisticsFile' From 4c6a00ecc0fdb56e92ef0fd0d5b09478d2e02e63 Mon Sep 17 00:00:00 2001 From: Christian Thiel Date: Mon, 20 Jan 2025 09:07:15 +0100 Subject: [PATCH 2/7] update rest spec py --- open-api/rest-catalog-open-api.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/open-api/rest-catalog-open-api.py b/open-api/rest-catalog-open-api.py index 06e7e5c98f02..1a4e94b76d26 100644 --- a/open-api/rest-catalog-open-api.py +++ b/open-api/rest-catalog-open-api.py @@ -1024,7 +1024,11 @@ class Term(BaseModel): class SetStatisticsUpdate(BaseUpdate): action: str = Field('set-statistics', const=True) - snapshot_id: Optional[int] = Field(None, alias='snapshot-id') + snapshot_id: Optional[int] = Field( + None, + alias='snapshot-id', + description='This optional field is **DEPRECATED for REMOVAL** since it contains redundant information. Clients should use the `statistics.snapshot-id` field instead.', + ) statistics: StatisticsFile From 4e5a0627b9ed2880d52df2c8e0822b9cb84fba04 Mon Sep 17 00:00:00 2001 From: Christian Date: Mon, 20 Jan 2025 14:53:37 +0100 Subject: [PATCH 3/7] Update api/src/main/java/org/apache/iceberg/UpdateStatistics.java Co-authored-by: Eduard Tudenhoefner --- api/src/main/java/org/apache/iceberg/UpdateStatistics.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/api/src/main/java/org/apache/iceberg/UpdateStatistics.java b/api/src/main/java/org/apache/iceberg/UpdateStatistics.java index e6ce843f1d2a..ee03160d94d4 100644 --- a/api/src/main/java/org/apache/iceberg/UpdateStatistics.java +++ b/api/src/main/java/org/apache/iceberg/UpdateStatistics.java @@ -38,7 +38,9 @@ public interface UpdateStatistics extends PendingUpdate> { * * @return this for method chaining */ - UpdateStatistics setStatistics(StatisticsFile statisticsFile); + default UpdateStatistics setStatistics(StatisticsFile statisticsFile) { + throw new UnsupportedOperationException("Setting statistics is not supported"); + } /** * Remove the table's statistics file for given snapshot. From 14e6ad19a17b84c2f7e6d79ccf8d00609aef2977 Mon Sep 17 00:00:00 2001 From: Christian Thiel Date: Mon, 20 Jan 2025 14:55:46 +0100 Subject: [PATCH 4/7] Fix typo, remove revapi change --- .palantir/revapi.yml | 4 ---- core/src/main/java/org/apache/iceberg/SetStatistics.java | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index fef4d663202d..18c63fbe7bb1 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -1146,10 +1146,6 @@ acceptedBreaks: \ org.apache.iceberg.TableMetadata)" justification: "Removing deprecated code" "1.7.0": - org.apache.iceberg:iceberg-api: - - code: "java.method.addedToInterface" - new: "method org.apache.iceberg.UpdateStatistics org.apache.iceberg.UpdateStatistics::setStatistics(org.apache.iceberg.StatisticsFile)" - justification: "Redundant snapshot-id in UpdateStatistics" org.apache.iceberg:iceberg-core: - code: "java.method.removed" old: "method org.apache.iceberg.deletes.PositionDeleteIndex\ diff --git a/core/src/main/java/org/apache/iceberg/SetStatistics.java b/core/src/main/java/org/apache/iceberg/SetStatistics.java index 7cbdff61634d..e127a1806a6b 100644 --- a/core/src/main/java/org/apache/iceberg/SetStatistics.java +++ b/core/src/main/java/org/apache/iceberg/SetStatistics.java @@ -33,7 +33,7 @@ public SetStatistics(TableOperations ops) { } /** - * Add a new schema. + * Set the statistics file for a snapshot. * * @deprecated since 1.8.0, will be removed in 1.9.0 or 2.0.0, use setStatistics(statisticsFile). */ From eafdf5cc3dbce6df04ca461968953be791d58ec9 Mon Sep 17 00:00:00 2001 From: Christian Thiel Date: Mon, 20 Jan 2025 15:20:59 +0100 Subject: [PATCH 5/7] Address comments --- api/src/main/java/org/apache/iceberg/UpdateStatistics.java | 2 +- core/src/main/java/org/apache/iceberg/SetStatistics.java | 2 +- open-api/rest-catalog-open-api.yaml | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/UpdateStatistics.java b/api/src/main/java/org/apache/iceberg/UpdateStatistics.java index ee03160d94d4..35896713a0b2 100644 --- a/api/src/main/java/org/apache/iceberg/UpdateStatistics.java +++ b/api/src/main/java/org/apache/iceberg/UpdateStatistics.java @@ -27,7 +27,7 @@ public interface UpdateStatistics extends PendingUpdate> { * the snapshot if any exists. * * @return this for method chaining - * @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use setStatistics(statisticsFile). + * @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use {@link #setStatistics(StatisticsFile)}. */ @Deprecated UpdateStatistics setStatistics(long snapshotId, StatisticsFile statisticsFile); diff --git a/core/src/main/java/org/apache/iceberg/SetStatistics.java b/core/src/main/java/org/apache/iceberg/SetStatistics.java index e127a1806a6b..ca09bc80fc98 100644 --- a/core/src/main/java/org/apache/iceberg/SetStatistics.java +++ b/core/src/main/java/org/apache/iceberg/SetStatistics.java @@ -35,7 +35,7 @@ public SetStatistics(TableOperations ops) { /** * Set the statistics file for a snapshot. * - * @deprecated since 1.8.0, will be removed in 1.9.0 or 2.0.0, use setStatistics(statisticsFile). + * @deprecated since 1.8.0, will be removed in 1.9.0 or 2.0.0, use {@link #setStatistics(StatisticsFile)}. */ @Deprecated @Override diff --git a/open-api/rest-catalog-open-api.yaml b/open-api/rest-catalog-open-api.yaml index b5032dcf81fb..f061766f582b 100644 --- a/open-api/rest-catalog-open-api.yaml +++ b/open-api/rest-catalog-open-api.yaml @@ -2889,6 +2889,7 @@ components: snapshot-id: type: integer format: int64 + deprecated: true description: This optional field is **DEPRECATED for REMOVAL** since it contains redundant information. Clients should use the `statistics.snapshot-id` field instead. From 2e79b9508006feca3755c98d4acb4b026eaf17ac Mon Sep 17 00:00:00 2001 From: Christian Thiel Date: Mon, 20 Jan 2025 17:13:06 +0100 Subject: [PATCH 6/7] spotless --- api/src/main/java/org/apache/iceberg/UpdateStatistics.java | 3 ++- core/src/main/java/org/apache/iceberg/SetStatistics.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/UpdateStatistics.java b/api/src/main/java/org/apache/iceberg/UpdateStatistics.java index 35896713a0b2..e1d5269325e6 100644 --- a/api/src/main/java/org/apache/iceberg/UpdateStatistics.java +++ b/api/src/main/java/org/apache/iceberg/UpdateStatistics.java @@ -27,7 +27,8 @@ public interface UpdateStatistics extends PendingUpdate> { * the snapshot if any exists. * * @return this for method chaining - * @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use {@link #setStatistics(StatisticsFile)}. + * @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use {@link + * #setStatistics(StatisticsFile)}. */ @Deprecated UpdateStatistics setStatistics(long snapshotId, StatisticsFile statisticsFile); diff --git a/core/src/main/java/org/apache/iceberg/SetStatistics.java b/core/src/main/java/org/apache/iceberg/SetStatistics.java index ca09bc80fc98..b5ec801e0aaf 100644 --- a/core/src/main/java/org/apache/iceberg/SetStatistics.java +++ b/core/src/main/java/org/apache/iceberg/SetStatistics.java @@ -35,7 +35,8 @@ public SetStatistics(TableOperations ops) { /** * Set the statistics file for a snapshot. * - * @deprecated since 1.8.0, will be removed in 1.9.0 or 2.0.0, use {@link #setStatistics(StatisticsFile)}. + * @deprecated since 1.8.0, will be removed in 1.9.0 or 2.0.0, use {@link + * #setStatistics(StatisticsFile)}. */ @Deprecated @Override From 75a349409ba36c50eedf017abe0d77ec3114cfff Mon Sep 17 00:00:00 2001 From: Christian Thiel Date: Wed, 22 Jan 2025 08:26:11 +0100 Subject: [PATCH 7/7] extend docstring --- api/src/main/java/org/apache/iceberg/UpdateStatistics.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/src/main/java/org/apache/iceberg/UpdateStatistics.java b/api/src/main/java/org/apache/iceberg/UpdateStatistics.java index e1d5269325e6..c595b1670893 100644 --- a/api/src/main/java/org/apache/iceberg/UpdateStatistics.java +++ b/api/src/main/java/org/apache/iceberg/UpdateStatistics.java @@ -35,7 +35,7 @@ public interface UpdateStatistics extends PendingUpdate> { /** * Set the table's statistics file for given snapshot, replacing the previous statistics file for - * the snapshot if any exists. + * the snapshot if any exists. The snapshot id of the statistics file will be used. * * @return this for method chaining */