Skip to content

Commit

Permalink
Merge branch 'main' into rck-after-each
Browse files Browse the repository at this point in the history
  • Loading branch information
Sreesh Maheshwar committed Jan 18, 2025
2 parents c7dac38 + bed7c33 commit 79a8db8
Show file tree
Hide file tree
Showing 599 changed files with 20,650 additions and 19,706 deletions.
2 changes: 1 addition & 1 deletion .asf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ github:
- marton-bod
- samarthjain
- SreeramGarlapati
- samredai
- gaborkaszab
- bitsondatadev
- ajantha-bhat
- jbonofre
- manuzhang
ghp_branch: gh-pages
ghp_path: /

Expand Down
3 changes: 2 additions & 1 deletion .github/ISSUE_TEMPLATE/iceberg_bug_report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ body:
description: What Apache Iceberg version are you using?
multiple: false
options:
- "1.7.0 (latest release)"
- "1.7.1 (latest release)"
- "1.7.0"
- "1.6.1"
- "1.6.0"
- "1.5.2"
Expand Down
3 changes: 0 additions & 3 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,7 @@ ORC:
HIVE:
- changed-files:
- any-glob-to-any-file: [
'hive3/**/*',
'hive-metastore/**/*',
'hive-runtime/**/*',
'hive3-orc-bundle/**/*'
]

DATA:
Expand Down
11 changes: 4 additions & 7 deletions .github/workflows/delta-conversion-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,17 @@ on:
- '.github/workflows/jmh-benchmarks-ci.yml'
- '.github/workflows/kafka-connect-ci.yml'
- '.github/workflows/labeler.yml'
- '.github/workflows/licence-check.yml'
- '.github/workflows/license-check.yml'
- '.github/workflows/open-api.yml'
- '.github/workflows/publish-snapshot.yml'
- '.github/workflows/recurring-jmh-benchmarks.yml'
- '.github/workflows/site-ci.yml'
- '.github/workflows/spark-ci.yml'
- '.github/workflows/stale.yml'
- '.gitignore'
- '.asf.yml'
- '.asf.yaml'
- 'dev/**'
- 'mr/**'
- 'hive3/**'
- 'hive3-orc-bundle/**'
- 'hive-runtime/**'
- 'flink/**'
- 'kafka-connect/**'
- 'docs/**'
Expand Down Expand Up @@ -89,7 +86,7 @@ jobs:
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: ./gradlew -DsparkVersions=3.5 -DscalaVersion=2.12 -DhiveVersions= -DkafkaVersions= -DflinkVersions= :iceberg-delta-lake:check -Pquick=true -x javadoc
- run: ./gradlew -DsparkVersions=3.5 -DscalaVersion=2.12 -DkafkaVersions= -DflinkVersions= :iceberg-delta-lake:check -Pquick=true -x javadoc
- uses: actions/upload-artifact@v4
if: failure()
with:
Expand Down Expand Up @@ -118,7 +115,7 @@ jobs:
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: ./gradlew -DsparkVersions=3.5 -DscalaVersion=2.13 -DhiveVersions= -DkafkaVersions= -DflinkVersions= :iceberg-delta-lake:check -Pquick=true -x javadoc
- run: ./gradlew -DsparkVersions=3.5 -DscalaVersion=2.13 -DkafkaVersions= -DflinkVersions= :iceberg-delta-lake:check -Pquick=true -x javadoc
- uses: actions/upload-artifact@v4
if: failure()
with:
Expand Down
9 changes: 3 additions & 6 deletions .github/workflows/flink-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,17 @@ on:
- '.github/workflows/jmh-benchmarks-ci.yml'
- '.github/workflows/kafka-connect-ci.yml'
- '.github/workflows/labeler.yml'
- '.github/workflows/licence-check.yml'
- '.github/workflows/license-check.yml'
- '.github/workflows/open-api.yml'
- '.github/workflows/publish-snapshot.yml'
- '.github/workflows/recurring-jmh-benchmarks.yml'
- '.github/workflows/site-ci.yml'
- '.github/workflows/spark-ci.yml'
- '.github/workflows/stale.yml'
- '.gitignore'
- '.asf.yml'
- '.asf.yaml'
- 'dev/**'
- 'mr/**'
- 'hive3/**'
- 'hive3-orc-bundle/**'
- 'hive-runtime/**'
- 'kafka-connect/**'
- 'spark/**'
- 'docs/**'
Expand Down Expand Up @@ -92,7 +89,7 @@ jobs:
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: ./gradlew -DsparkVersions= -DhiveVersions= -DkafkaVersions= -DflinkVersions=${{ matrix.flink }} :iceberg-flink:iceberg-flink-${{ matrix.flink }}:check :iceberg-flink:iceberg-flink-runtime-${{ matrix.flink }}:check -Pquick=true -x javadoc
- run: ./gradlew -DsparkVersions= -DkafkaVersions= -DflinkVersions=${{ matrix.flink }} :iceberg-flink:iceberg-flink-${{ matrix.flink }}:check :iceberg-flink:iceberg-flink-runtime-${{ matrix.flink }}:check -Pquick=true -x javadoc
- uses: actions/upload-artifact@v4
if: failure()
with:
Expand Down
35 changes: 3 additions & 32 deletions .github/workflows/hive-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@ on:
- '.github/workflows/jmh-benchmarks-ci.yml'
- '.github/workflows/kafka-connect-ci.yml'
- '.github/workflows/labeler.yml'
- '.github/workflows/licence-check.yml'
- '.github/workflows/license-check.yml'
- '.github/workflows/open-api.yml'
- '.github/workflows/publish-snapshot.yml'
- '.github/workflows/recurring-jmh-benchmarks.yml'
- '.github/workflows/site-ci.yml'
- '.github/workflows/spark-ci.yml'
- '.github/workflows/stale.yml'
- '.gitignore'
- '.asf.yml'
- '.asf.yaml'
- 'dev/**'
- 'arrow/**'
- 'spark/**'
Expand Down Expand Up @@ -87,36 +87,7 @@ jobs:
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: ./gradlew -DsparkVersions= -DhiveVersions=2 -DflinkVersions= -DkafkaVersions= -Pquick=true :iceberg-mr:check :iceberg-hive-runtime:check -x javadoc
- uses: actions/upload-artifact@v4
if: failure()
with:
name: test logs
path: |
**/build/testlogs
hive3-tests:
runs-on: ubuntu-22.04
strategy:
matrix:
jvm: [11, 17, 21]
env:
SPARK_LOCAL_IP: localhost
steps:
- uses: actions/checkout@v4
- uses: actions/setup-java@v4
with:
distribution: zulu
java-version: ${{ matrix.jvm }}
- uses: actions/cache@v4
with:
path: |
~/.gradle/caches
~/.gradle/wrapper
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: ./gradlew -DsparkVersions= -DhiveVersions=3 -DflinkVersions= -DkafkaVersions= -Pquick=true :iceberg-hive3-orc-bundle:check :iceberg-hive3:check :iceberg-hive-runtime:check -x javadoc
- run: ./gradlew -DsparkVersions= -DflinkVersions= -DkafkaVersions= -Pquick=true :iceberg-mr:check -x javadoc
- uses: actions/upload-artifact@v4
if: failure()
with:
Expand Down
7 changes: 3 additions & 4 deletions .github/workflows/java-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,18 @@ on:
- '.github/workflows/jmh-benchmarks-ci.yml'
- '.github/workflows/kafka-connect-ci.yml'
- '.github/workflows/labeler.yml'
- '.github/workflows/licence-check.yml'
- '.github/workflows/license-check.yml'
- '.github/workflows/open-api.yml'
- '.github/workflows/publish-snapshot.yml'
- '.github/workflows/recurring-jmh-benchmarks.yml'
- '.github/workflows/site-ci.yml'
- '.github/workflows/spark-ci.yml'
- '.github/workflows/stale.yml'
- '.gitignore'
- '.asf.yml'
- '.asf.yaml'
- 'dev/**'
- 'docs/**'
- 'site/**'
- 'open-api/**'
- 'format/**'
- '.gitattributes'
- 'README.md'
Expand Down Expand Up @@ -83,7 +82,7 @@ jobs:
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: ./gradlew check -DsparkVersions= -DhiveVersions= -DflinkVersions= -DkafkaVersions= -Pquick=true -x javadoc
- run: ./gradlew check -DsparkVersions= -DflinkVersions= -DkafkaVersions= -Pquick=true -x javadoc
- uses: actions/upload-artifact@v4
if: failure()
with:
Expand Down
9 changes: 3 additions & 6 deletions .github/workflows/kafka-connect-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,18 @@ on:
- '.github/workflows/java-ci.yml'
- '.github/workflows/jmh-benchmarks-ci.yml'
- '.github/workflows/labeler.yml'
- '.github/workflows/licence-check.yml'
- '.github/workflows/license-check.yml'
- '.github/workflows/open-api.yml'
- '.github/workflows/publish-snapshot.yml'
- '.github/workflows/recurring-jmh-benchmarks.yml'
- '.github/workflows/site-ci.yml'
- '.github/workflows/spark-ci.yml'
- '.github/workflows/stale.yml'
- '.gitignore'
- '.asf.yml'
- '.asf.yaml'
- 'dev/**'
- 'mr/**'
- 'flink/**'
- 'hive3/**'
- 'hive3-orc-bundle/**'
- 'hive-runtime/**'
- 'spark/**'
- 'docs/**'
- 'site/**'
Expand Down Expand Up @@ -91,7 +88,7 @@ jobs:
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: |
./gradlew -DsparkVersions= -DhiveVersions= -DflinkVersions= -DkafkaVersions=3 \
./gradlew -DsparkVersions= -DflinkVersions= -DkafkaVersions=3 \
:iceberg-kafka-connect:iceberg-kafka-connect-events:check \
:iceberg-kafka-connect:iceberg-kafka-connect:check \
:iceberg-kafka-connect:iceberg-kafka-connect-runtime:check \
Expand Down
17 changes: 12 additions & 5 deletions .github/workflows/publish-iceberg-rest-fixture-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,15 @@ jobs:
if: github.event_name == 'push' && contains(github.ref, 'refs/tags/')
run: |
echo "DOCKER_IMAGE_VERSION=`echo ${{ github.ref }} | tr -d -c 0-9.`" >> "$GITHUB_ENV"
- name: Build Docker Image
run: docker build -t $DOCKER_REPOSITORY/$DOCKER_IMAGE_TAG:$DOCKER_IMAGE_VERSION -f docker/iceberg-rest-fixture/Dockerfile .
- name: Push Docker Image
run: |
docker push $DOCKER_REPOSITORY/$DOCKER_IMAGE_TAG:$DOCKER_IMAGE_VERSION
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build and Push
uses: docker/build-push-action@v6
with:
context: ./
file: ./docker/iceberg-rest-fixture/Dockerfile
platforms: linux/amd64,linux/arm64
push: true
tags: ${{ env.DOCKER_REPOSITORY }}/${{ env.DOCKER_IMAGE_TAG }}:${{ env.DOCKER_IMAGE_VERSION }}
2 changes: 1 addition & 1 deletion .github/workflows/publish-snapshot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@ jobs:
- run: |
./gradlew printVersion
./gradlew -DallModules publishApachePublicationToMavenRepository -PmavenUser=${{ secrets.NEXUS_USER }} -PmavenPassword=${{ secrets.NEXUS_PW }}
./gradlew -DflinkVersions= -DsparkVersions=3.3,3.4,3.5 -DscalaVersion=2.13 -DkafkaVersions=3 -DhiveVersions= publishApachePublicationToMavenRepository -PmavenUser=${{ secrets.NEXUS_USER }} -PmavenPassword=${{ secrets.NEXUS_PW }}
./gradlew -DflinkVersions= -DsparkVersions=3.3,3.4,3.5 -DscalaVersion=2.13 -DkafkaVersions=3 publishApachePublicationToMavenRepository -PmavenUser=${{ secrets.NEXUS_USER }} -PmavenPassword=${{ secrets.NEXUS_PW }}
9 changes: 3 additions & 6 deletions .github/workflows/spark-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,17 @@ on:
- '.github/workflows/jmh-benchmarks-ci.yml'
- '.github/workflows/kafka-connect-ci.yml'
- '.github/workflows/labeler.yml'
- '.github/workflows/licence-check.yml'
- '.github/workflows/license-check.yml'
- '.github/workflows/open-api.yml'
- '.github/workflows/publish-snapshot.yml'
- '.github/workflows/recurring-jmh-benchmarks.yml'
- '.github/workflows/site-ci.yml'
- '.github/workflows/stale.yml'
- '.gitignore'
- '.asf.yml'
- '.asf.yaml'
- 'dev/**'
- 'site/**'
- 'mr/**'
- 'hive3/**'
- 'hive3-orc-bundle/**'
- 'hive-runtime/**'
- 'flink/**'
- 'kafka-connect/**'
- 'docs/**'
Expand Down Expand Up @@ -102,7 +99,7 @@ jobs:
tool-cache: false
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: |
./gradlew -DsparkVersions=${{ matrix.spark }} -DscalaVersion=${{ matrix.scala }} -DhiveVersions= -DflinkVersions= -DkafkaVersions= \
./gradlew -DsparkVersions=${{ matrix.spark }} -DscalaVersion=${{ matrix.scala }} -DflinkVersions= -DkafkaVersions= \
:iceberg-spark:iceberg-spark-${{ matrix.spark }}_${{ matrix.scala }}:check \
:iceberg-spark:iceberg-spark-extensions-${{ matrix.spark }}_${{ matrix.scala }}:check \
:iceberg-spark:iceberg-spark-runtime-${{ matrix.spark }}_${{ matrix.scala }}:check \
Expand Down
26 changes: 26 additions & 0 deletions .palantir/revapi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1145,6 +1145,32 @@ acceptedBreaks:
new: "method org.apache.iceberg.BaseMetastoreOperations.CommitStatus org.apache.iceberg.BaseMetastoreTableOperations::checkCommitStatus(java.lang.String,\
\ org.apache.iceberg.TableMetadata)"
justification: "Removing deprecated code"
"1.7.0":
org.apache.iceberg:iceberg-core:
- code: "java.method.removed"
old: "method <T extends org.apache.iceberg.StructLike> org.apache.iceberg.deletes.PositionDeleteIndex\
\ org.apache.iceberg.deletes.Deletes::toPositionIndex(java.lang.CharSequence,\
\ java.util.List<org.apache.iceberg.io.CloseableIterable<T>>)"
justification: "Removing deprecated code"
- code: "java.method.removed"
old: "method <T extends org.apache.iceberg.StructLike> org.apache.iceberg.deletes.PositionDeleteIndex\
\ org.apache.iceberg.deletes.Deletes::toPositionIndex(java.lang.CharSequence,\
\ java.util.List<org.apache.iceberg.io.CloseableIterable<T>>, java.util.concurrent.ExecutorService)"
justification: "Removing deprecated code"
- code: "java.method.removed"
old: "method <T> org.apache.iceberg.io.CloseableIterable<T> org.apache.iceberg.deletes.Deletes::streamingFilter(org.apache.iceberg.io.CloseableIterable<T>,\
\ java.util.function.Function<T, java.lang.Long>, org.apache.iceberg.io.CloseableIterable<java.lang.Long>)"
justification: "Removing deprecated code"
- code: "java.method.removed"
old: "method <T> org.apache.iceberg.io.CloseableIterable<T> org.apache.iceberg.deletes.Deletes::streamingFilter(org.apache.iceberg.io.CloseableIterable<T>,\
\ java.util.function.Function<T, java.lang.Long>, org.apache.iceberg.io.CloseableIterable<java.lang.Long>,\
\ org.apache.iceberg.deletes.DeleteCounter)"
justification: "Removing deprecated code"
- code: "java.method.removed"
old: "method <T> org.apache.iceberg.io.CloseableIterable<T> org.apache.iceberg.deletes.Deletes::streamingMarker(org.apache.iceberg.io.CloseableIterable<T>,\
\ java.util.function.Function<T, java.lang.Long>, org.apache.iceberg.io.CloseableIterable<java.lang.Long>,\
\ java.util.function.Consumer<T>)"
justification: "Removing deprecated code"
apache-iceberg-0.14.0:
org.apache.iceberg:iceberg-api:
- code: "java.class.defaultSerializationChanged"
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ Iceberg also has modules for adding Iceberg support to processing engines:
---
**NOTE**

The tests require Docker to execute. On MacOS (with Docker Desktop), you might need to create a symbolic name to the docker socket in order to be detected by the tests:
The tests require Docker to execute. On macOS (with Docker Desktop), you might need to create a symbolic name to the docker socket in order to be detected by the tests:

```
sudo ln -s $HOME/.docker/run/docker.sock /var/run/docker.sock
Expand All @@ -97,3 +97,4 @@ This repository contains the Java implementation of Iceberg. Other implementatio
* **Go**: [iceberg-go](https://github.com/apache/iceberg-go)
* **PyIceberg** (Python): [iceberg-python](https://github.com/apache/iceberg-python)
* **Rust**: [iceberg-rust](https://github.com/apache/iceberg-rust)
* **C++**: [iceberg-cpp](https://github.com/apache/iceberg-cpp)
11 changes: 11 additions & 0 deletions api/src/main/java/org/apache/iceberg/ExpireSnapshots.java
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,15 @@ public interface ExpireSnapshots extends PendingUpdate<List<Snapshot>> {
* @return this for method chaining
*/
ExpireSnapshots cleanExpiredFiles(boolean clean);

/**
* Enable cleaning up unused metadata, such as partition specs, schemas, etc.
*
* @param clean remove unused partition specs, schemas, or other metadata when true
* @return this for method chaining
*/
default ExpireSnapshots cleanExpiredMetadata(boolean clean) {
throw new UnsupportedOperationException(
this.getClass().getName() + " doesn't implement cleanExpiredMetadata");
}
}
2 changes: 1 addition & 1 deletion api/src/main/java/org/apache/iceberg/ScanTask.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public interface ScanTask extends Serializable {
* @return the total number of bytes to read
*/
default long sizeBytes() {
return 4 * 1028 * 1028; // 4 MB
return 4 * 1024 * 1024; // 4 MB
}

/**
Expand Down
25 changes: 19 additions & 6 deletions api/src/main/java/org/apache/iceberg/actions/RewriteTablePath.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@
* <ol>
* <li>The name of the latest metadata.json rewritten to staging location. After the files are
* copied, this will be the root of the copied table.
* <li>A list of all files added to the table between startVersion and endVersion, including their
* original and target paths under the target prefix. This list covers both original and
* rewritten files, allowing for copying to the target paths to form the copied table.
* <li>A 'copy-plan'. This is a list of all files added to the table between startVersion and
* endVersion, including their original and target paths under the target prefix. This list
* covers both original and rewritten files, allowing for copying a functioning version of the
* source table to the target prefix.
* </ol>
*/
public interface RewriteTablePath extends Action<RewriteTablePath, RewriteTablePath.Result> {
Expand Down Expand Up @@ -91,9 +92,21 @@ interface Result {
String stagingLocation();

/**
* Path to a comma-separated list of source and target paths for all files added to the table
* between startVersion and endVersion, including original data files and metadata files
* rewritten to staging.
* Result file list location. This file contains a listing of all files added to the table
* between startVersion and endVersion, comma-separated. <br>
* For each file, it will include the source path (either the original path in the table, or in
* the staging location if rewritten), and the target path (under the new prefix).
*
* <p>Example file content:
*
* <pre><code>
* sourcepath/datafile1.parquet,targetpath/datafile1.parquet
* sourcepath/datafile2.parquet,targetpath/datafile2.parquet
* stagingpath/manifest.avro,targetpath/manifest.avro
* </code></pre>
*
* <br>
* This allows for copying a functioning version of the table to the target prefix.
*/
String fileListLocation();

Expand Down
Loading

0 comments on commit 79a8db8

Please sign in to comment.