Skip to content

Commit

Permalink
small generalization
Browse files Browse the repository at this point in the history
  • Loading branch information
devinrsmith committed Jan 15, 2025
1 parent 554ada6 commit 964b2b6
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,12 @@
import io.deephaven.engine.table.impl.locations.TableKey;
import io.deephaven.parquet.table.ParquetInstructions;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.schema.MessageType;

import java.util.Optional;

/**
* A mapping between Deephaven column names and Parquet {@link ColumnDescriptor column descriptors}.
*
* TODO: describe better
* A resolver from Deephaven column names to Parquet paths.
*/
public interface ParquetColumnResolver {

Expand All @@ -24,12 +23,22 @@ interface Factory {
/**
* TODO: description
*
*
*
* @param tableKey the table key
* @param tableLocationKey the Parquet TLK
* @return the Parquet column resolver
*/
ParquetColumnResolver of(TableKey tableKey, ParquetTableLocationKey tableLocationKey);
}

Optional<ColumnDescriptor> of(String columnName);
/**
*
*
* @param columnName the column name
* @return the path to the leaf field in the schema
* @see ColumnDescriptor#getPath()
* @see MessageType#getColumnDescription(String[])
*/
Optional<String[]> of(String columnName);
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ public static Builder builder() {
public abstract MessageType schema();

/**
* The map from Deephaven column name to {@link ColumnDescriptor}. The {@link #schema()} must contains the column
* descriptors.
* The map from Deephaven column name to {@link ColumnDescriptor}. The {@link #schema()} must contain each column
* descriptor.
*/
public abstract Map<String, ColumnDescriptor> mapping();

@Override
public final Optional<ColumnDescriptor> of(String columnName) {
return Optional.ofNullable(mapping().get(columnName));
public final Optional<String[]> of(String columnName) {
return Optional.ofNullable(mapping().get(columnName)).map(ColumnDescriptor::getPath);
}

public interface Builder {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,6 @@ protected ColumnLocation makeColumnLocation(@NotNull final String columnName) {
} else {
// empty list will result in exists=false
nameList = resolver.of(columnName)
.map(ColumnDescriptor::getPath)
.map(Arrays::asList)
.orElse(List.of());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ public void ambiguousFields() {

@Test
public void ambiguousFieldsNested() {
final ParquetFieldIdColumnResolverFactory factory = ParquetFieldIdColumnResolverFactory.of(Map.of("Col1", 1));
// X (1), list, element (2)
// Y (2), list, element (1)
// Z (3)
Expand All @@ -158,7 +157,7 @@ public void ambiguousFieldsNested() {
// for example, an implementation could choose to consider the innermost (or outermost) field id for matching
// purposes.
try {
factory.of(schema);
ParquetFieldIdColumnResolverFactory.of(Map.of("Col1", 1)).of(schema);
failBecauseExceptionWasNotThrown(IllegalArgumentException.class);
} catch (IllegalArgumentException e) {
assertThat(e).hasMessageContaining("Col1 -> 1 has multiple paths [X, list, element], [Y, list, element]");
Expand Down

0 comments on commit 964b2b6

Please sign in to comment.