forked from deephaven/deephaven-core
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Port DH-11168: Indexing enhancements to Community. (deephaven#3851)
Data Indexing * Unifies APIs for working with data indexes * Expands persistent indexing support to include non-clustered and multi-column indexes in addition to the existing clustered, single-column index support (Parquet specific) * Replaces "grouping" support, which is deprecated and adapted to "indexing" support in the persistence layers to ensure backwards-compatibility (Parquet specific) * Adds new, user-facing APIs for accessing data indexes available from any Deephaven Table * Adds new, user-facing APIs for adding in-memory data indexes to any Deephaven Table * Adds support for refreshing data indexes on refreshing ("live") Deephaven Tables * Expands Deephaven Table operation support for multi-column data indexes to include naturalJoin, aj, sort, and the aggBy family of operations; where already supports this for independent, single-column "match" filters, and whereIn/whereNotIn have full support * Adds Deephaven Table operation support for refreshing data indexes to where, whereIn/whereNotIn, naturalJoin, aj, sort, and the aggBy family of operations * Enables many future optimizations
- Loading branch information
Showing
215 changed files
with
10,061 additions
and
5,648 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
io.deephaven.project.ProjectType=DOCKER_REGISTRY | ||
deephaven.registry.imageName=selenium/standalone-firefox:4.16.1-20231219 | ||
deephaven.registry.imageId=selenium/standalone-firefox@sha256:a405fe92b3ce5d7eb31a07e1f99be3d628fdc0e5bdc81febd8dc11786edef024 | ||
deephaven.registry.platform=linux/amd64 |
118 changes: 118 additions & 0 deletions
118
engine/api/src/main/java/io/deephaven/engine/table/BasicDataIndex.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
// | ||
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending | ||
// | ||
package io.deephaven.engine.table; | ||
|
||
import io.deephaven.engine.liveness.LivenessReferent; | ||
import io.deephaven.engine.rowset.RowSet; | ||
import io.deephaven.util.annotations.FinalDefault; | ||
import org.jetbrains.annotations.NotNull; | ||
|
||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
/** | ||
* Implementations of BasicDataIndex provide a data index for a {@link Table}. The index is itself a {@link Table} with | ||
* columns corresponding to the indexed column(s) ("key" columns) and a column of {@link RowSet RowSets} that contain | ||
* the row keys for each unique combination of key values (that is, the "group" or "bucket"). The index itself is a | ||
* Table containing the key column(s) and the RowSets associated with each unique combination of values. Implementations | ||
* may be loaded from persistent storage or created at runtime, e.g. via aggregations. | ||
*/ | ||
public interface BasicDataIndex extends LivenessReferent { | ||
|
||
/** | ||
* Get a map from indexed {@link ColumnSource ColumnSources} to key column names for the index {@link #table() | ||
* table}. This map must be ordered in the same order presented by {@link #keyColumnNames()} and used for lookup | ||
* keys. | ||
* | ||
* @return A map designating the key column names for each indexed {@link ColumnSource} | ||
*/ | ||
@NotNull | ||
Map<ColumnSource<?>, String> keyColumnNamesByIndexedColumn(); | ||
|
||
/** | ||
* Get a list of the key column names for the index {@link #table() table}. | ||
* | ||
* @return The key column names | ||
*/ | ||
@NotNull | ||
List<String> keyColumnNames(); | ||
|
||
/** | ||
* Get the {@link RowSet} column name for the index {@link #table() table}. | ||
* | ||
* @return The {@link RowSet} column name | ||
*/ | ||
@NotNull | ||
String rowSetColumnName(); | ||
|
||
/** | ||
* Get the key {@link ColumnSource ColumnSources} of the index {@link #table() table}. | ||
* | ||
* @return An array of the key {@link ColumnSource ColumnSources}, to be owned by the caller | ||
*/ | ||
@FinalDefault | ||
@NotNull | ||
default ColumnSource<?>[] keyColumns() { | ||
final Table indexTable = table(); | ||
return keyColumnNames().stream() | ||
.map(indexTable::getColumnSource) | ||
.toArray(ColumnSource[]::new); | ||
} | ||
|
||
/** | ||
* Get the key {@link ColumnSource ColumnSources} of the index {@link #table() table} in the relative order of | ||
* {@code indexedColumnSources}. | ||
* | ||
* @param indexedColumnSources The indexed {@link ColumnSource ColumnSources} in the desired order; must match the | ||
* keys of {@link #keyColumnNamesByIndexedColumn()} | ||
* @return An array of the key {@link ColumnSource ColumnSources} in the specified order, to be owned by the caller | ||
*/ | ||
@FinalDefault | ||
@NotNull | ||
default ColumnSource<?>[] keyColumns(@NotNull final ColumnSource<?>[] indexedColumnSources) { | ||
final Table indexTable = table(); | ||
final Map<ColumnSource<?>, String> keyColumnNamesByIndexedColumn = keyColumnNamesByIndexedColumn(); | ||
// Verify that the provided columns match the indexed columns. | ||
if (keyColumnNamesByIndexedColumn.size() != indexedColumnSources.length | ||
|| !keyColumnNamesByIndexedColumn.keySet().containsAll(Arrays.asList(indexedColumnSources))) { | ||
throw new IllegalArgumentException(String.format( | ||
"The provided columns %s do not match the index's indexed columns %s", | ||
Arrays.toString(indexedColumnSources), | ||
keyColumnNamesByIndexedColumn.keySet())); | ||
} | ||
return Arrays.stream(indexedColumnSources) | ||
.map(keyColumnNamesByIndexedColumn::get) | ||
.map(indexTable::getColumnSource) | ||
.toArray(ColumnSource[]::new); | ||
} | ||
|
||
/** | ||
* Get the {@link RowSet} {@link ColumnSource} of the index {@link #table() table}. | ||
* | ||
* @return The {@link RowSet} {@link ColumnSource} | ||
*/ | ||
@FinalDefault | ||
@NotNull | ||
default ColumnSource<RowSet> rowSetColumn() { | ||
return table().getColumnSource(rowSetColumnName(), RowSet.class); | ||
} | ||
|
||
/** | ||
* Get the {@link Table} backing this data index. | ||
* | ||
* @return The {@link Table} | ||
*/ | ||
@NotNull | ||
Table table(); | ||
|
||
/** | ||
* Whether the index {@link #table()} {@link Table#isRefreshing() is refreshing}. Some transformations will force | ||
* the index to become static even when the source table is refreshing. | ||
* | ||
* @return {@code true} if the index {@link #table()} {@link Table#isRefreshing() is refreshing}, {@code false} | ||
* otherwise | ||
*/ | ||
boolean isRefreshing(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.