deephaven · malhotrashivam · Apr 2, 2024 · Jan 30, 2024 · Jan 31, 2024 · Feb 13, 2024
diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/RowGroupReaderImpl.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/RowGroupReaderImpl.java
@@ -12,14 +12,11 @@
 import org.jetbrains.annotations.Nullable;
 
 import java.net.URI;
-import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import static io.deephaven.parquet.base.ParquetFileReader.FILE_URI_SCHEME;
-
 final class RowGroupReaderImpl implements RowGroupReader {
     private final RowGroup rowGroup;
     private final SeekableChannelsProvider channelsProvider;

diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetInstructions.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetInstructions.java
@@ -724,8 +724,7 @@ public Builder setGenerateMetadataFiles(final boolean generateMetadataFiles) {
          * "PC=partition1/table-0.parquet", "PC=partition1/table-1.parquet", etc., where PC is a partitioning
          * column.</li>
          * <li>The token {@value #UUID_TOKEN} will be replaced with a random UUID. For example, a base name of
-         * "table-{uuid}" will result in files named like
-         * "table-8e8ab6b2-62f2-40d1-8191-1c5b70c5f330.parquet.parquet".</li>
+         * "table-{uuid}" will result in files named like "table-8e8ab6b2-62f2-40d1-8191-1c5b70c5f330.parquet".</li>
          * <li>The token {@value #PARTITIONS_TOKEN} will be replaced with an underscore-delimited, concatenated string
          * of partition values. For example, a base name of "{partitions}-table" will result in files like
          * "PC1=partition1/PC2=partitionA/PC1=partition1_PC2=partitionA-table.parquet", where "PC1" and "PC2" are

diff --git a/...parquet/table/src/main/java/io/deephaven/parquet/table/ParquetMetadataFileWriterImpl.java b/...parquet/table/src/main/java/io/deephaven/parquet/table/ParquetMetadataFileWriterImpl.java
@@ -104,7 +104,7 @@ private static class ParquetFileMetadata {
     }
 
     /**
-     * Add parquet metadata for the provided parquet file the combined metadata file.
+     * Add parquet metadata for the provided parquet file to the combined metadata file.
      *
      * @param parquetFilePath The parquet file destination path
      * @param metadata The parquet metadata
@@ -255,13 +255,7 @@ private static void mergeBlocksInto(final ParquetFileMetadata parquetFileMetadat
 
     private static String getRelativePath(final String parquetFilePath, final Path metadataRootDirAbsPath) {
         final Path parquetFileAbsPath = new File(parquetFilePath).getAbsoluteFile().toPath();
-        final String relativePath = metadataRootDirAbsPath.relativize(parquetFileAbsPath).toString();
-        // Remove leading slashes from the relative path
-        int pos = 0;
-        while (pos < relativePath.length() && relativePath.charAt(pos) == '/') {
-            pos++;
-        }
-        return relativePath.substring(pos);
+        return metadataRootDirAbsPath.relativize(parquetFileAbsPath).toString();
     }
 
     private void writeMetadataFile(final ParquetMetadata metadataFooter, final String outputPath) throws IOException {

diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTableWriter.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTableWriter.java
@@ -91,8 +91,8 @@ static class IndexWritingInfo {
                 final File destFile) {
             this.indexColumnNames = indexColumnNames;
             this.parquetColumnNames = parquetColumnNames;
-            this.destFileForMetadata = destFileForMetadata;
-            this.destFile = destFile;
+            this.destFileForMetadata = destFileForMetadata.getAbsoluteFile();
+            this.destFile = destFile.getAbsoluteFile();
         }
     }
 
@@ -143,7 +143,7 @@ static void write(
         try {
             if (indexInfoList != null) {
                 cleanupFiles = new ArrayList<>(indexInfoList.size());
-                final Path destDirPath = Paths.get(destFilePath).getParent();
+                final Path destDirPath = new File(destFilePath).getAbsoluteFile().getParentFile().toPath();
                 for (final ParquetTableWriter.IndexWritingInfo info : indexInfoList) {
                     try (final SafeCloseable ignored = t.isRefreshing() ? LivenessScopeStack.open() : null) {
                         // This will retrieve an existing index if one exists, or create a new one if not
@@ -166,7 +166,6 @@ static void write(
                                     .addColumnNameMapping(INDEX_ROW_SET_COLUMN_NAME, dataIndex.rowSetColumnName())
                                     .build();
                         }
-                        // We don't accumulate metadata from grouping files into the main metadata file
                         write(indexTable, indexTable.getDefinition(), writeInstructionsToUse,
                                 info.destFile.getAbsolutePath(), info.destFileForMetadata.getAbsolutePath(),
                                 Collections.emptyMap(), TableInfo.builder(), NullParquetMetadataFileWriter.INSTANCE,

diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java
@@ -614,7 +614,9 @@ public static void writeKeyValuePartitionedTable(@NotNull final Table sourceTabl
      * Write a partitioned table to disk in parquet format with all the {@link PartitionedTable#keyColumnNames() key
      * columns} as "key=value" format in a nested directory structure. To generate the partitioned table, users can call
      * {@link Table#partitionBy(String...) partitionBy} on the required columns. The generated parquet files will have
-     * names of the format provided by {@link ParquetInstructions#baseNameForPartitionedParquetData()}.
+     * names of the format provided by {@link ParquetInstructions#baseNameForPartitionedParquetData()}. This method does
+     * not write any indexes as sidecar tables to disk. To write indexes, use
+     * {@link #writeKeyValuePartitionedTable(PartitionedTable, String, ParquetInstructions, String[][])}.
      *
      * @param partitionedTable The partitioned table to write
      * @param destinationDir The path to destination root directory to store partitioned data in nested format.
@@ -658,7 +660,9 @@ public static void writeKeyValuePartitionedTable(@NotNull final PartitionedTable
      * Write a partitioned table to disk in parquet format with all the {@link PartitionedTable#keyColumnNames() key
      * columns} as "key=value" format in a nested directory structure. To generate the partitioned table, users can call
      * {@link Table#partitionBy(String...) partitionBy} on the required columns. The generated parquet files will have
-     * names of the format provided by {@link ParquetInstructions#baseNameForPartitionedParquetData()}.
+     * names of the format provided by {@link ParquetInstructions#baseNameForPartitionedParquetData()}. This method does
+     * not write any indexes as sidecar tables to disk. To write indexes, use
+     * {@link #writeKeyValuePartitionedTable(PartitionedTable, TableDefinition, String, ParquetInstructions, String[][])}.
      *
      * @param partitionedTable The partitioned table to write
      * @param definition table definition to use (instead of the one implied by the table itself)
@@ -798,27 +802,38 @@ private static void writeKeyValuePartitionedTableImpl(@NotNull final Partitioned
         } else {
             partitioningColumnsSchema = null;
         }
-        final Map<String, Map<ParquetCacheTags, Object>> computedCache =
-                buildComputedCache(() -> sourceTable.orElseGet(partitionedTable::merge), leafDefinition);
         final Table[] partitionedDataArray = partitionedData.toArray(Table[]::new);
         try (final SafeCloseable ignored = LivenessScopeStack.open()) {
             // TODO(deephaven-core#5292): Optimize creating index on constituent tables
-            addIndexesToTables(partitionedDataArray, indexColumnArr);
+            final Map<String, Map<ParquetCacheTags, Object>> computedCache =
+                    buildComputedCache(() -> sourceTable.orElseGet(partitionedTable::merge), leafDefinition);
+            // Store hard reference to prevent indexes from being garbage collected
+            final List<DataIndex> dataIndexes = addIndexesToTables(partitionedDataArray, indexColumnArr);
             writeParquetTablesImpl(partitionedDataArray, leafDefinition, writeInstructions,
                     destinations.toArray(File[]::new), indexColumnArr, partitioningColumnsSchema,
                     new File(destinationRoot), computedCache);
+            if (dataIndexes != null) {
+                dataIndexes.clear();
+            }
         }
     }
 
-    private static void addIndexesToTables(@NotNull final Table[] tables,
+    /**
+     * Add data indexes to provided tables, if not present, and return a list of hard references to the indexes.
+     */
+    @Nullable
+    private static List<DataIndex> addIndexesToTables(@NotNull final Table[] tables,
             @Nullable final String[][] indexColumnArr) {
-        if (indexColumnArr != null && indexColumnArr.length != 0) {
-            for (final Table table : tables) {
-                for (final String[] indexCols : indexColumnArr) {
-                    DataIndexer.getOrCreateDataIndex(table, indexCols);
-                }
+        if (indexColumnArr == null || indexColumnArr.length == 0) {
+            return null;
+        }
+        final List<DataIndex> dataIndexes = new ArrayList<>(indexColumnArr.length * tables.length);
+        for (final Table table : tables) {
+            for (final String[] indexCols : indexColumnArr) {
+                dataIndexes.add(DataIndexer.getOrCreateDataIndex(table, indexCols));
             }
         }
+        return dataIndexes;
     }
 
     /**

@@ -220,6 +220,10 @@ def _j_file_array(paths: List[str]):
     return jpy.array("java.io.File", [_JFile(el) for el in paths])
 
 
+def _j_array_of_array_of_string(data_indexes: List[List[str]]):
+    return jpy.array("[Ljava.lang.String;", [jpy.array("java.lang.String", index_cols) for index_cols in data_indexes])
+
+
 def delete(path: str) -> None:
     """ Deletes a Parquet table on disk.
 
@@ -245,6 +249,7 @@ def write(
     max_dictionary_size: Optional[int] = None,
     target_page_size: Optional[int] = None,
     generate_metadata_files: Optional[bool] = None,
+    index_columns: Optional[List[List[str]]] = None
 ) -> None:
     """ Write a table to a Parquet file.
 
@@ -268,6 +273,11 @@ def write(
             defaults to False. Generating these files can help speed up reading of partitioned parquet data because these
             files contain metadata (including schema) about the entire dataset, which can be used to skip reading some
             files.
+        index_columns (Optional[List[List[str]]]): Lists containing the column names for indexes to persist. The write
+            operation will store the index info as sidecar tables. By default, data indexes to write are determined by
+            those present on the source table. This argument is used to narrow the set of indexes to write, or to be
+            explicit about the expected set of indexes present on all sources. Indexes that are specified but missing
+            will be computed on demand.
 
     Raises:
         DHError
@@ -282,15 +292,18 @@ def write(
             for_read=False,
             generate_metadata_files=generate_metadata_files,
         )
-
-        table_definition = None
         if col_definitions is not None:
             table_definition = _JTableDefinition.of([col.j_column_definition for col in col_definitions])
+        else:
+            table_definition = table._definition
 
-        if table_definition:
-            _JParquetTools.writeTable(table.j_table, path, table_definition, write_instructions)
+        if index_columns:
+            table_array = jpy.array("io.deephaven.engine.table.Table", [table.j_table])
+            index_columns_array = _j_array_of_array_of_string(index_columns)
+            _JParquetTools.writeParquetTables(table_array, table_definition, write_instructions,
+                                              _j_file_array([path]), index_columns_array)
         else:
-            _JParquetTools.writeTable(table.j_table, _JFile(path), write_instructions)
+            _JParquetTools.writeTable(table.j_table, path, table_definition, write_instructions)
     except Exception as e:
         raise DHError(e, "failed to write to parquet data.") from e
 
@@ -306,6 +319,7 @@ def write_partitioned(
         target_page_size: Optional[int] = None,
         base_name: Optional[str] = None,
         generate_metadata_files: Optional[bool] = None,
+        index_columns: Optional[List[List[str]]] = None
 ) -> None:
     """ Write table to disk in parquet format with the partitioning columns written as "key=value" format in a nested
     directory structure. For example, for a partitioned column "date", we will have a directory structure like
@@ -344,6 +358,11 @@ def write_partitioned(
             defaults to False. Generating these files can help speed up reading of partitioned parquet data because these
             files contain metadata (including schema) about the entire dataset, which can be used to skip reading some
             files.
+        index_columns (Optional[List[List[str]]]): Lists containing the column names for indexes to persist. The write
+            operation will store the index info as sidecar tables. By default, data indexes to write are determined by
+            those present on the source table. This argument is used to narrow the set of indexes to write, or to be
+            explicit about the expected set of indexes present on all sources. Indexes that are specified but missing
+            will be computed on demand.
 
     Raises:
         DHError
@@ -364,11 +383,20 @@ def write_partitioned(
         if col_definitions is not None:
             table_definition = _JTableDefinition.of([col.j_column_definition for col in col_definitions])
 
-        if table_definition:
-            _JParquetTools.writeKeyValuePartitionedTable(table.j_object, table_definition, destination_dir,
-                                                         write_instructions)
+        if index_columns:
+            index_columns_array = _j_array_of_array_of_string(index_columns)
+            if table_definition:
+                _JParquetTools.writeKeyValuePartitionedTable(table.j_object, table_definition, destination_dir,
+                                                             write_instructions, index_columns_array)
+            else:
+                _JParquetTools.writeKeyValuePartitionedTable(table.j_object, destination_dir, write_instructions,
+                                                             index_columns_array)
         else:
-            _JParquetTools.writeKeyValuePartitionedTable(table.j_object, destination_dir, write_instructions)
+            if table_definition:
+                _JParquetTools.writeKeyValuePartitionedTable(table.j_object, table_definition, destination_dir,
+                                                             write_instructions)
+            else:
+                _JParquetTools.writeKeyValuePartitionedTable(table.j_object, destination_dir, write_instructions)
     except Exception as e:
         raise DHError(e, "failed to write to parquet data.") from e
 
@@ -382,23 +410,19 @@ def batch_write(
     max_dictionary_keys: Optional[int] = None,
     max_dictionary_size: Optional[int] = None,
     target_page_size: Optional[int] = None,
-    grouping_cols: Optional[List[str]] = None,
     generate_metadata_files: Optional[bool] = None,
+    index_columns: Optional[List[List[str]]] = None
 ):
     """ Writes tables to disk in parquet format to a supplied set of paths.
 
-    If you specify grouping columns, there must already be grouping information for those columns in the sources.
-    This can be accomplished with .groupBy(<grouping columns>).ungroup() or .sort(<grouping column>).
-
     Note that either all the tables are written out successfully or none is.
 
     Args:
         tables (List[Table]): the source tables
-        paths (List[str]): the destinations paths. Any non existing directories in the paths provided are
+        paths (List[str]): the destination paths. Any non-existing directories in the paths provided are
             created. If there is an error, any intermediate directories previously created are removed; note this makes
             this method unsafe for concurrent use
-        col_definitions (Optional[List[Column]]): the column definitions to use for writing, instead of the definitions
-            implied by the table. Default is None, which means use the column definitions implied by the table
+        col_definitions (List[Column]): the column definitions to use for writing.
         col_instructions (Optional[List[ColumnInstruction]]): instructions for customizations while writing
         compression_codec_name (Optional[str]): the compression codec to use. Allowed values include "UNCOMPRESSED",
             "SNAPPY", "GZIP", "LZO", "LZ4", "LZ4_RAW", "ZSTD", etc. If not specified, defaults to "SNAPPY".
@@ -407,11 +431,15 @@ def batch_write(
         max_dictionary_size (Optional[int]): the maximum number of bytes the writer should add to the dictionary before
             switching to non-dictionary encoding, never evaluated for non-String columns, defaults to 2^20 (1,048,576)
         target_page_size (Optional[int]): the target page size in bytes, if not specified, defaults to 2^20 bytes (1 MiB)
-        grouping_cols (Optional[List[str]]): the group column names
         generate_metadata_files (Optional[bool]): whether to generate parquet _metadata and _common_metadata files,
             defaults to False. Generating these files can help speed up reading of partitioned parquet data because these
             files contain metadata (including schema) about the entire dataset, which can be used to skip reading some
             files.
+        index_columns (Optional[List[List[str]]]): Lists containing the column names for indexes to persist. The write
+            operation will store the index info as sidecar tables. By default, data indexes to write are determined by
+            those present on the source table. This argument is used to narrow the set of indexes to write, or to be
+            explicit about the expected set of indexes present on all sources. Indexes that are specified but missing
+            will be computed on demand.
 
     Raises:
         DHError
@@ -429,9 +457,10 @@ def batch_write(
 
         table_definition = _JTableDefinition.of([col.j_column_definition for col in col_definitions])
 
-        if grouping_cols:
+        if index_columns:
+            index_columns_array = _j_array_of_array_of_string(index_columns)
             _JParquetTools.writeParquetTables([t.j_table for t in tables], table_definition, write_instructions,
-                                              _j_file_array(paths), grouping_cols)
+                                              _j_file_array(paths), index_columns_array)
         else:
             _JParquetTools.writeTables([t.j_table for t in tables], table_definition,
                                        _j_file_array(paths))