From 61e7567172959099d75386453d36d9482992de96 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 24 Jun 2024 20:12:05 -0700 Subject: [PATCH 1/3] Add segfaulting test. --- cpp/tests/join/conditional_join_tests.cu | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cpp/tests/join/conditional_join_tests.cu b/cpp/tests/join/conditional_join_tests.cu index 79968bcd7f4..d11bf1bbc74 100644 --- a/cpp/tests/join/conditional_join_tests.cu +++ b/cpp/tests/join/conditional_join_tests.cu @@ -889,3 +889,13 @@ TYPED_TEST(ConditionalLeftAntiJoinTest, TestCompareRandomToHashNulls) auto [left, right] = gen_random_nullable_repeated_columns(); this->compare_to_hash_join_nulls({left}, {right}); }; + +TYPED_TEST(ConditionalInnerJoinTest, TestLeftColumnIsEmpty) +{ + this->test({{}}, {{0}}, left_zero_eq_right_zero, {{}}); +}; + +TYPED_TEST(ConditionalInnerJoinTest, TestRightColumnIsEmpty) +{ + this->test({{0}}, {{}}, left_zero_eq_right_zero, {{}}); +}; From 6122c8b22e223cb2eb28b9e3bfdc27f51d09e90f Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 25 Jun 2024 15:10:46 -0700 Subject: [PATCH 2/3] Fix bug in conditional left anti-join when right side has zero rows, and improve tests. --- cpp/src/join/conditional_join.cu | 7 +- cpp/tests/join/conditional_join_tests.cu | 102 +++++++++++++++-------- 2 files changed, 69 insertions(+), 40 deletions(-) diff --git a/cpp/src/join/conditional_join.cu b/cpp/src/join/conditional_join.cu index f02dee5f7f5..6f05f3009c9 100644 --- a/cpp/src/join/conditional_join.cu +++ b/cpp/src/join/conditional_join.cu @@ -49,7 +49,8 @@ std::unique_ptr> conditional_join_anti_semi( if (right.num_rows() == 0) { switch (join_type) { case join_kind::LEFT_ANTI_JOIN: - return std::make_unique>(left.num_rows(), stream, mr); + return get_trivial_left_join_indices(left, stream, rmm::mr::get_current_device_resource()) + .first; case join_kind::LEFT_SEMI_JOIN: return std::make_unique>(0, stream, mr); default: CUDF_FAIL("Invalid join kind."); break; @@ -96,10 +97,6 @@ std::unique_ptr> conditional_join_anti_semi( join_size = size.value(stream); } - if (left.num_rows() == 0) { - return std::make_unique>(0, stream, mr); - } - rmm::device_scalar write_index(0, stream); auto left_indices = std::make_unique>(join_size, stream, mr); diff --git a/cpp/tests/join/conditional_join_tests.cu b/cpp/tests/join/conditional_join_tests.cu index d11bf1bbc74..7ab4a2ea465 100644 --- a/cpp/tests/join/conditional_join_tests.cu +++ b/cpp/tests/join/conditional_join_tests.cu @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -222,21 +223,25 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest { std::vector> expected_outputs) { auto result_size = this->join_size(left, right, predicate); - EXPECT_TRUE(result_size == expected_outputs.size()); - - auto result = this->join(left, right, predicate); - std::vector> result_pairs; - for (size_t i = 0; i < result.first->size(); ++i) { - // Note: Not trying to be terribly efficient here since these tests are - // small, otherwise a batch copy to host before constructing the tuples - // would be important. - result_pairs.push_back({result.first->element(i, cudf::get_default_stream()), - result.second->element(i, cudf::get_default_stream())}); - } + EXPECT_EQ(result_size, expected_outputs.size()); + + auto result = this->join(left, right, predicate); + auto lhs_result = cudf::detail::make_std_vector_sync(*result.first, cudf::get_default_stream()); + auto rhs_result = + cudf::detail::make_std_vector_sync(*result.second, cudf::get_default_stream()); + std::vector> result_pairs(lhs_result.size()); + std::transform(lhs_result.begin(), + lhs_result.end(), + rhs_result.begin(), + result_pairs.begin(), + [](cudf::size_type lhs, cudf::size_type rhs) { + return std::pair{lhs, rhs}; + }); std::sort(result_pairs.begin(), result_pairs.end()); std::sort(expected_outputs.begin(), expected_outputs.end()); - EXPECT_TRUE(std::equal(expected_outputs.begin(), expected_outputs.end(), result_pairs.begin())); + EXPECT_TRUE(std::equal( + expected_outputs.begin(), expected_outputs.end(), result_pairs.begin(), result_pairs.end())); } /* @@ -411,6 +416,11 @@ TYPED_TEST(ConditionalInnerJoinTest, TestOneColumnLeftEmpty) this->test({{}}, {{3, 4, 5}}, left_zero_eq_right_zero, {}); }; +TYPED_TEST(ConditionalInnerJoinTest, TestOneColumnRightEmpty) +{ + this->test({{3, 4, 5}}, {{}}, left_zero_eq_right_zero, {}); +}; + TYPED_TEST(ConditionalInnerJoinTest, TestOneColumnTwoRowAllEqual) { this->test({{0, 1}}, {{0, 0}}, left_zero_eq_right_zero, {{0, 0}, {0, 1}}); @@ -600,6 +610,14 @@ TYPED_TEST(ConditionalLeftJoinTest, TestOneColumnLeftEmpty) this->test({{}}, {{3, 4, 5}}, left_zero_eq_right_zero, {}); }; +TYPED_TEST(ConditionalLeftJoinTest, TestOneColumnRightEmpty) +{ + this->test({{3, 4, 5}}, + {{}}, + left_zero_eq_right_zero, + {{0, JoinNoneValue}, {1, JoinNoneValue}, {2, JoinNoneValue}}); +}; + TYPED_TEST(ConditionalLeftJoinTest, TestCompareRandomToHash) { auto [left, right] = gen_random_repeated_columns(); @@ -666,6 +684,14 @@ TYPED_TEST(ConditionalFullJoinTest, TestOneColumnLeftEmpty) {{JoinNoneValue, 0}, {JoinNoneValue, 1}, {JoinNoneValue, 2}}); }; +TYPED_TEST(ConditionalFullJoinTest, TestOneColumnRightEmpty) +{ + this->test({{3, 4, 5}}, + {{}}, + left_zero_eq_right_zero, + {{0, JoinNoneValue}, {1, JoinNoneValue}, {2, JoinNoneValue}}); +}; + TYPED_TEST(ConditionalFullJoinTest, TestTwoColumnThreeRowSomeEqual) { this->test({{0, 1, 2}, {10, 20, 30}}, @@ -705,20 +731,16 @@ struct ConditionalJoinSingleReturnTest : public ConditionalJoinTest { auto [left_wrappers, right_wrappers, left_columns, right_columns, left, right] = this->parse_input(left_data, right_data); auto result_size = this->join_size(left, right, predicate); - EXPECT_TRUE(result_size == expected_outputs.size()); - - auto result = this->join(left, right, predicate); - std::vector resulting_indices; - for (size_t i = 0; i < result->size(); ++i) { - // Note: Not trying to be terribly efficient here since these tests are - // small, otherwise a batch copy to host before constructing the tuples - // would be important. - resulting_indices.push_back(result->element(i, cudf::get_default_stream())); - } - std::sort(resulting_indices.begin(), resulting_indices.end()); + EXPECT_EQ(result_size, expected_outputs.size()); + + auto result = this->join(left, right, predicate); + auto result_indices = cudf::detail::make_std_vector_sync(*result, cudf::get_default_stream()); + std::sort(result_indices.begin(), result_indices.end()); std::sort(expected_outputs.begin(), expected_outputs.end()); - EXPECT_TRUE( - std::equal(resulting_indices.begin(), resulting_indices.end(), expected_outputs.begin())); + EXPECT_TRUE(std::equal(result_indices.begin(), + result_indices.end(), + expected_outputs.begin(), + expected_outputs.end())); } void _compare_to_hash_join(std::unique_ptr> const& result, @@ -826,6 +848,16 @@ struct ConditionalLeftSemiJoinTest : public ConditionalJoinSingleReturnTest { TYPED_TEST_SUITE(ConditionalLeftSemiJoinTest, cudf::test::IntegralTypesNotBool); +TYPED_TEST(ConditionalLeftSemiJoinTest, TestOneColumnLeftEmpty) +{ + this->test({{}}, {{3, 4, 5}}, left_zero_eq_right_zero, {}); +}; + +TYPED_TEST(ConditionalLeftSemiJoinTest, TestOneColumnRightEmpty) +{ + this->test({{3, 4, 5}}, {{}}, left_zero_eq_right_zero, {}); +}; + TYPED_TEST(ConditionalLeftSemiJoinTest, TestTwoColumnThreeRowSomeEqual) { this->test({{0, 1, 2}, {10, 20, 30}}, {{0, 1, 3}, {30, 40, 50}}, left_zero_eq_right_zero, {0, 1}); @@ -873,6 +905,16 @@ struct ConditionalLeftAntiJoinTest : public ConditionalJoinSingleReturnTest { TYPED_TEST_SUITE(ConditionalLeftAntiJoinTest, cudf::test::IntegralTypesNotBool); +TYPED_TEST(ConditionalLeftAntiJoinTest, TestOneColumnLeftEmpty) +{ + this->test({{}}, {{3, 4, 5}}, left_zero_eq_right_zero, {}); +}; + +TYPED_TEST(ConditionalLeftAntiJoinTest, TestOneColumnRightEmpty) +{ + this->test({{3, 4, 5}}, {{}}, left_zero_eq_right_zero, {0, 1, 2}); +}; + TYPED_TEST(ConditionalLeftAntiJoinTest, TestTwoColumnThreeRowSomeEqual) { this->test({{0, 1, 2}, {10, 20, 30}}, {{0, 1, 3}, {30, 40, 50}}, left_zero_eq_right_zero, {2}); @@ -889,13 +931,3 @@ TYPED_TEST(ConditionalLeftAntiJoinTest, TestCompareRandomToHashNulls) auto [left, right] = gen_random_nullable_repeated_columns(); this->compare_to_hash_join_nulls({left}, {right}); }; - -TYPED_TEST(ConditionalInnerJoinTest, TestLeftColumnIsEmpty) -{ - this->test({{}}, {{0}}, left_zero_eq_right_zero, {{}}); -}; - -TYPED_TEST(ConditionalInnerJoinTest, TestRightColumnIsEmpty) -{ - this->test({{0}}, {{}}, left_zero_eq_right_zero, {{}}); -}; From 1a0f66e0ffb52436caf2595ae0a2c140c4f186f1 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 25 Jun 2024 15:34:03 -0700 Subject: [PATCH 3/3] Use provided mr. --- cpp/src/join/conditional_join.cu | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/cpp/src/join/conditional_join.cu b/cpp/src/join/conditional_join.cu index 6f05f3009c9..97a06d5a923 100644 --- a/cpp/src/join/conditional_join.cu +++ b/cpp/src/join/conditional_join.cu @@ -48,9 +48,7 @@ std::unique_ptr> conditional_join_anti_semi( { if (right.num_rows() == 0) { switch (join_type) { - case join_kind::LEFT_ANTI_JOIN: - return get_trivial_left_join_indices(left, stream, rmm::mr::get_current_device_resource()) - .first; + case join_kind::LEFT_ANTI_JOIN: return get_trivial_left_join_indices(left, stream, mr).first; case join_kind::LEFT_SEMI_JOIN: return std::make_unique>(0, stream, mr); default: CUDF_FAIL("Invalid join kind."); break; @@ -146,8 +144,7 @@ conditional_join(table_view const& left, // with a corresponding NULL from the right. case join_kind::LEFT_JOIN: case join_kind::LEFT_ANTI_JOIN: - case join_kind::FULL_JOIN: - return get_trivial_left_join_indices(left, stream, rmm::mr::get_current_device_resource()); + case join_kind::FULL_JOIN: return get_trivial_left_join_indices(left, stream, mr); // Inner and left semi joins return empty output because no matches can exist. case join_kind::INNER_JOIN: case join_kind::LEFT_SEMI_JOIN: @@ -166,8 +163,7 @@ conditional_join(table_view const& left, std::make_unique>(0, stream, mr)); // Full joins need to return the trivial complement. case join_kind::FULL_JOIN: { - auto ret_flipped = - get_trivial_left_join_indices(right, stream, rmm::mr::get_current_device_resource()); + auto ret_flipped = get_trivial_left_join_indices(right, stream, mr); return std::pair(std::move(ret_flipped.second), std::move(ret_flipped.first)); } default: CUDF_FAIL("Invalid join kind."); break;