Skip to content

Commit

Permalink
Use spark action to rewrite position deletes
Browse files Browse the repository at this point in the history
  • Loading branch information
szehon-ho committed Dec 7, 2024
1 parent 89d623f commit 6880510
Show file tree
Hide file tree
Showing 7 changed files with 697 additions and 400 deletions.
23 changes: 17 additions & 6 deletions api/src/main/java/org/apache/iceberg/actions/RewriteTablePath.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@
* copied, this will be the root of the copied table.
* <li>A 'copy-plan'. This is a list of all files added to the table between startVersion and
* endVersion, including their original and target paths under the target prefix. This list
* covers both original and rewritten files, allowing for copying to the target paths from the
* copied table.
* covers both original and rewritten files, allowing for copying a functioning version of the
* source table to the target prefix.
* </ol>
*/
public interface RewriteTablePath extends Action<RewriteTablePath, RewriteTablePath.Result> {
Expand Down Expand Up @@ -92,10 +92,21 @@ interface Result {
String stagingLocation();

/**
* Result file list location. This file contains a 'copy plan', a comma-separated list of all
* files added to the table between startVersion and endVersion, including their original and
* target paths under the target prefix. This list covers both original and rewritten files,
* allowing for copying to the target paths from the copied table.
* Result file list location. This file contains a listing of all files added to the table
* between startVersion and endVersion, comma-separated. <br>
* For each file, it will include the source path (either the original path in the table, or in
* the staging location if rewritten), and the target path (under the new prefix).
*
* <p>Example file content:
*
* <pre><code>
* sourcepath/datafile1.parquet,targetpath/datafile1.parquet
* sourcepath/datafile2.parquet,targetpath/datafile2.parquet
* stagingpath/manifest.avro,targetpath/manifest.avro
* </code></pre>
*
* <br>
* This allows for copying a functioning version of the table to the target prefix.
*/
String fileListLocation();

Expand Down
6 changes: 3 additions & 3 deletions core/src/main/java/org/apache/iceberg/ManifestLists.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;

public class ManifestLists {
class ManifestLists {
private ManifestLists() {}

public static List<ManifestFile> read(InputFile manifestList) {
static List<ManifestFile> read(InputFile manifestList) {
try (CloseableIterable<ManifestFile> files =
Avro.read(manifestList)
.rename("manifest_file", GenericManifestFile.class.getName())
Expand All @@ -50,7 +50,7 @@ public static List<ManifestFile> read(InputFile manifestList) {
}
}

public static ManifestListWriter write(
static ManifestListWriter write(
int formatVersion,
OutputFile manifestListFile,
long snapshotId,
Expand Down
Loading

0 comments on commit 6880510

Please sign in to comment.