From b5e2fd2b91983e49376abd7344925d06d5c48ad3 Mon Sep 17 00:00:00 2001 From: Stamatis Zampetakis Date: Wed, 23 Oct 2024 13:01:57 +0300 Subject: [PATCH 1/3] [CALCITE-6640] RelMdUniqueKeys grows exponentially when key columns are repeated in projections Avoid generating non-minimal/redundant key sets when computing the unique keys for columns that are repeated in the output. --- .../calcite/rel/metadata/RelMdUniqueKeys.java | 27 ++++++------------- .../apache/calcite/test/RelMetadataTest.java | 15 +++++++++-- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUniqueKeys.java b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUniqueKeys.java index 31707997ea5b..8745d0606466 100644 --- a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUniqueKeys.java +++ b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUniqueKeys.java @@ -46,7 +46,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMultimap; import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Maps; +import com.google.common.collect.Multimap; import com.google.common.collect.Sets; import org.checkerframework.checker.nullness.qual.Nullable; @@ -54,7 +54,6 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -63,8 +62,6 @@ import static org.apache.calcite.rel.metadata.RelMdColumnUniqueness.PASSTHROUGH_AGGREGATIONS; import static org.apache.calcite.rel.metadata.RelMdColumnUniqueness.getConstantColumnSet; -import static java.util.Objects.requireNonNull; - /** * RelMdUniqueKeys supplies a default implementation of * {@link RelMetadataQuery#getUniqueKeys} for the standard logical algebra. @@ -172,8 +169,7 @@ private static Set getProjectUniqueKeys(SingleRel rel, RelMetad return ImmutableSet.of(); } - Map mapInToOutPos = - Maps.transformValues(inToOutPosBuilder.build().asMap(), ImmutableBitSet::of); + Multimap mapInToOutPos = inToOutPosBuilder.build(); ImmutableSet.Builder resultBuilder = ImmutableSet.builder(); // Now add to the projUniqueKeySet the child keys that are fully @@ -184,19 +180,12 @@ private static Set getProjectUniqueKeys(SingleRel rel, RelMetad continue; } // colMask is mapped to output project, however, the column can be mapped more than once: - // select id, id, id, unique2, unique2 - // the resulting unique keys would be {{0},{3}}, {{0},{4}}, {{0},{1},{4}}, ... - - Iterable> product = - Linq4j.product( - Util.transform(colMask, in -> - Util.filter( - requireNonNull(mapInToOutPos.get(in), - () -> "no entry for column " + in - + " in mapInToOutPos: " + mapInToOutPos).powerSet(), - bs -> !bs.isEmpty()))); - - resultBuilder.addAll(Util.transform(product, ImmutableBitSet::union)); + // select key1, key1, val1, val2, key2 from ... + // the resulting unique keys would be {{0},{4}}, {{1},{4}} + + Iterable> product = Linq4j.product(Util.transform(colMask, mapInToOutPos::get)); + + resultBuilder.addAll(Util.transform(product, ImmutableBitSet::of)); } return resultBuilder.build(); } diff --git a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java index d654612851bf..61937ac10f0e 100644 --- a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java +++ b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java @@ -1448,6 +1448,17 @@ private void checkColumnUniquenessForFilterWithConstantColumns(String sql) { .withCatalogReaderFactory(factory) .assertThatUniqueKeysAre(); + sql("select key1, key1, key2, value1 from s.composite_keys_table") + .withCatalogReaderFactory(factory) + .assertThatUniqueKeysAre(bitSetOf(0, 2), bitSetOf(1, 2)); + sql("select key1, key2, key2, value1 from s.composite_keys_table") + .withCatalogReaderFactory(factory) + .assertThatUniqueKeysAre(bitSetOf(0, 1), bitSetOf(0, 2)); + + sql("select key1, key1, key2, key2, value1 from s.composite_keys_table") + .withCatalogReaderFactory(factory) + .assertThatUniqueKeysAre(bitSetOf(0, 2), bitSetOf(0, 3), bitSetOf(1, 2), bitSetOf(1, 3)); + // no column of composite keys sql("select value1 from s.composite_keys_table") .withCatalogReaderFactory(factory) @@ -1640,7 +1651,7 @@ private static ImmutableBitSet bitSetOf(int... bits) { @Test void calcMultipleColumnsAreUniqueCalc() { sql("select empno, empno from emp") .convertingProjectAsCalc() - .assertThatUniqueKeysAre(bitSetOf(0), bitSetOf(1), bitSetOf(0, 1)); + .assertThatUniqueKeysAre(bitSetOf(0), bitSetOf(1)); } @Test void calcMultipleColumnsAreUniqueCalc2() { @@ -1655,7 +1666,7 @@ private static ImmutableBitSet bitSetOf(int... bits) { + " from emp a1 join emp a2\n" + " on (a1.empno=a2.empno)") .convertingProjectAsCalc() - .assertThatUniqueKeysAre(bitSetOf(0), bitSetOf(1), bitSetOf(1, 2), bitSetOf(2)); + .assertThatUniqueKeysAre(bitSetOf(0), bitSetOf(1), bitSetOf(2)); } @Test void calcColumnsAreNonUniqueCalc() { From 66ab17c9e0d8ddf7466056e660fb2ffe2c183b99 Mon Sep 17 00:00:00 2001 From: Stamatis Zampetakis Date: Tue, 29 Oct 2024 15:44:37 +0200 Subject: [PATCH 2/3] Add new ImmutableBitSet#isMinimal API and check unique keys minimality in tests --- .../apache/calcite/util/ImmutableBitSet.java | 12 +++++++ .../calcite/util/ImmutableBitSetTest.java | 32 +++++++++++++++++++ .../calcite/test/RelMetadataFixture.java | 1 + 3 files changed, 45 insertions(+) diff --git a/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java b/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java index cd688476f8f4..bf57ec02fca6 100644 --- a/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java +++ b/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java @@ -978,6 +978,18 @@ public static boolean allContain(Collection bitSets, int bit) { return true; } + /** + * Returns whether this is a minimal set with respect to the specified collection of bitSets. + */ + public boolean isMinimal(Collection bitSets) { + for (ImmutableBitSet other : bitSets) { + if (!this.equals(other) && this.contains(other)) { + return false; + } + } + return true; + } + /** Returns whether a given predicate evaluates to true for all bits in this * set. */ public boolean allMatch(IntPredicate predicate) { diff --git a/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java b/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java index 260af704c55b..10ea95025b31 100644 --- a/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java +++ b/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java @@ -668,6 +668,38 @@ private List getSortedList() { assertFalse(ImmutableBitSet.allContain(collection2, 4)); } + @Test void testIsMinimal() { + ImmutableBitSet se = ImmutableBitSet.of(); + ImmutableBitSet s0 = ImmutableBitSet.of(0); + ImmutableBitSet s1 = ImmutableBitSet.of(1); + ImmutableBitSet s01 = ImmutableBitSet.of(0, 1); + + assertTrue(se.isMinimal(Collections.singletonList(se))); + assertTrue(se.isMinimal(Collections.singletonList(s0))); + assertTrue(se.isMinimal(Collections.singletonList(s1))); + assertTrue(se.isMinimal(Collections.singletonList(s01))); + + assertFalse(s0.isMinimal(Collections.singletonList(se))); + assertTrue(s0.isMinimal(Collections.singletonList(s0))); + assertTrue(s0.isMinimal(Collections.singletonList(s1))); + assertTrue(s0.isMinimal(Collections.singletonList(s01))); + + assertFalse(s1.isMinimal(Collections.singletonList(se))); + assertTrue(s1.isMinimal(Collections.singletonList(s0))); + assertTrue(s1.isMinimal(Collections.singletonList(s1))); + assertTrue(s1.isMinimal(Collections.singletonList(s01))); + + assertFalse(s01.isMinimal(Collections.singletonList(se))); + assertFalse(s01.isMinimal(Collections.singletonList(s0))); + assertFalse(s01.isMinimal(Collections.singletonList(s1))); + assertTrue(s01.isMinimal(Collections.singletonList(s01))); + + assertTrue(se.isMinimal(Arrays.asList(s0, s1))); + assertTrue(s0.isMinimal(Arrays.asList(s0, s1))); + assertTrue(s1.isMinimal(Arrays.asList(s0, s1))); + assertFalse(s01.isMinimal(Arrays.asList(s0, s1))); + } + /** * Test case for {@link ImmutableBitSet#anyMatch(IntPredicate)} * and {@link ImmutableBitSet#allMatch(IntPredicate)}. diff --git a/testkit/src/main/java/org/apache/calcite/test/RelMetadataFixture.java b/testkit/src/main/java/org/apache/calcite/test/RelMetadataFixture.java index 3dff04476295..eb5e060f6c7a 100644 --- a/testkit/src/main/java/org/apache/calcite/test/RelMetadataFixture.java +++ b/testkit/src/main/java/org/apache/calcite/test/RelMetadataFixture.java @@ -356,6 +356,7 @@ public RelMetadataFixture assertThatUniqueKeysAre( ImmutableSortedSet.copyOf(result), is(ImmutableSortedSet.copyOf(expectedUniqueKeys))); checkUniqueConsistent(rel); + assertThat(result.stream().allMatch(s -> s.isMinimal(result)), is(true)); return this; } From 1e3b09a02bf465320b327de9714ec30c12e29e65 Mon Sep 17 00:00:00 2001 From: Stamatis Zampetakis Date: Thu, 21 Nov 2024 11:28:45 +0100 Subject: [PATCH 3/3] Revert "Add new ImmutableBitSet#isMinimal API and check unique keys minimality in tests" The codebase has already "minimization" code in various other places (e.g., RelMdUniqueKeys#filterSupersets) and there is overlapping functionality with the new isMinimal API. Based on these findings, I don't think a new API is necessary thus I reverted the respective code changes from here. If we want to add a new public API its better to refactor/use the existing "minimization" logic. However, the refactoring will require some discussion on where to move the code and how we want the API to look like so it is out of the scope of this PR. --- .../apache/calcite/util/ImmutableBitSet.java | 12 ------- .../calcite/util/ImmutableBitSetTest.java | 32 ------------------- .../calcite/test/RelMetadataFixture.java | 1 - 3 files changed, 45 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java b/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java index bf57ec02fca6..cd688476f8f4 100644 --- a/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java +++ b/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java @@ -978,18 +978,6 @@ public static boolean allContain(Collection bitSets, int bit) { return true; } - /** - * Returns whether this is a minimal set with respect to the specified collection of bitSets. - */ - public boolean isMinimal(Collection bitSets) { - for (ImmutableBitSet other : bitSets) { - if (!this.equals(other) && this.contains(other)) { - return false; - } - } - return true; - } - /** Returns whether a given predicate evaluates to true for all bits in this * set. */ public boolean allMatch(IntPredicate predicate) { diff --git a/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java b/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java index 10ea95025b31..260af704c55b 100644 --- a/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java +++ b/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java @@ -668,38 +668,6 @@ private List getSortedList() { assertFalse(ImmutableBitSet.allContain(collection2, 4)); } - @Test void testIsMinimal() { - ImmutableBitSet se = ImmutableBitSet.of(); - ImmutableBitSet s0 = ImmutableBitSet.of(0); - ImmutableBitSet s1 = ImmutableBitSet.of(1); - ImmutableBitSet s01 = ImmutableBitSet.of(0, 1); - - assertTrue(se.isMinimal(Collections.singletonList(se))); - assertTrue(se.isMinimal(Collections.singletonList(s0))); - assertTrue(se.isMinimal(Collections.singletonList(s1))); - assertTrue(se.isMinimal(Collections.singletonList(s01))); - - assertFalse(s0.isMinimal(Collections.singletonList(se))); - assertTrue(s0.isMinimal(Collections.singletonList(s0))); - assertTrue(s0.isMinimal(Collections.singletonList(s1))); - assertTrue(s0.isMinimal(Collections.singletonList(s01))); - - assertFalse(s1.isMinimal(Collections.singletonList(se))); - assertTrue(s1.isMinimal(Collections.singletonList(s0))); - assertTrue(s1.isMinimal(Collections.singletonList(s1))); - assertTrue(s1.isMinimal(Collections.singletonList(s01))); - - assertFalse(s01.isMinimal(Collections.singletonList(se))); - assertFalse(s01.isMinimal(Collections.singletonList(s0))); - assertFalse(s01.isMinimal(Collections.singletonList(s1))); - assertTrue(s01.isMinimal(Collections.singletonList(s01))); - - assertTrue(se.isMinimal(Arrays.asList(s0, s1))); - assertTrue(s0.isMinimal(Arrays.asList(s0, s1))); - assertTrue(s1.isMinimal(Arrays.asList(s0, s1))); - assertFalse(s01.isMinimal(Arrays.asList(s0, s1))); - } - /** * Test case for {@link ImmutableBitSet#anyMatch(IntPredicate)} * and {@link ImmutableBitSet#allMatch(IntPredicate)}. diff --git a/testkit/src/main/java/org/apache/calcite/test/RelMetadataFixture.java b/testkit/src/main/java/org/apache/calcite/test/RelMetadataFixture.java index eb5e060f6c7a..3dff04476295 100644 --- a/testkit/src/main/java/org/apache/calcite/test/RelMetadataFixture.java +++ b/testkit/src/main/java/org/apache/calcite/test/RelMetadataFixture.java @@ -356,7 +356,6 @@ public RelMetadataFixture assertThatUniqueKeysAre( ImmutableSortedSet.copyOf(result), is(ImmutableSortedSet.copyOf(expectedUniqueKeys))); checkUniqueConsistent(rel); - assertThat(result.stream().allMatch(s -> s.isMinimal(result)), is(true)); return this; }