[SPARK-46517][PS][TESTS][FOLLOWUPS] Reorganize IndexingTest: Move t…

…o `pyspark.pandas.tests.indexes.*` and add the parity test ### What changes were proposed in this pull request? this is the last PR to Reorganize `IndexingTest`: 1, move it to `pyspark.pandas.tests.indexes.*`; 2, add the missing parity test ### Why are the changes needed? test parity and testing parallelism ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? ci ### Was this patch authored or co-authored using generative AI tooling? no Closes #44520 from zhengruifeng/ps_test_xxx. Authored-by: Ruifeng Zheng <[email protected]> Signed-off-by: Ruifeng Zheng <[email protected]>
apache · Dec 28, 2023 · 0de70c4 · 0de70c4
1 parent 6fcc268
commit 0de70c4
Show file tree

Hide file tree

Showing 3 changed files with 60 additions and 5 deletions.
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
@@ -816,6 +816,7 @@ def __hash__(self):
         "pyspark.pandas.tests.indexes.test_datetime_round",
         "pyspark.pandas.tests.indexes.test_align",
         "pyspark.pandas.tests.indexes.test_indexing",
+        "pyspark.pandas.tests.indexes.test_indexing_adv",
         "pyspark.pandas.tests.indexes.test_indexing_basic",
         "pyspark.pandas.tests.indexes.test_indexing_iloc",
         "pyspark.pandas.tests.indexes.test_indexing_loc",
@@ -879,7 +880,6 @@ def __hash__(self):
         "pyspark.pandas.tests.groupby.test_stat_func",
         "pyspark.pandas.tests.groupby.test_stat_prod",
         "pyspark.pandas.tests.groupby.test_value_counts",
-        "pyspark.pandas.tests.test_indexing",
         "pyspark.pandas.tests.diff_frames_ops.test_align",
         "pyspark.pandas.tests.diff_frames_ops.test_arithmetic",
         "pyspark.pandas.tests.diff_frames_ops.test_arithmetic_ext",
@@ -1093,6 +1093,7 @@ def __hash__(self):
         "pyspark.pandas.tests.connect.indexes.test_parity_map",
         "pyspark.pandas.tests.connect.indexes.test_parity_align",
         "pyspark.pandas.tests.connect.indexes.test_parity_indexing",
+        "pyspark.pandas.tests.connect.indexes.test_parity_indexing_adv",
         "pyspark.pandas.tests.connect.indexes.test_parity_indexing_basic",
         "pyspark.pandas.tests.connect.indexes.test_parity_indexing_iloc",
         "pyspark.pandas.tests.connect.indexes.test_parity_indexing_loc",

diff --git a/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing_adv.py b/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing_adv.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.indexes.test_indexing_adv import IndexingAdvMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+
+class IndexingAdvParityTests(
+    IndexingAdvMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.indexes.test_parity_indexing import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/test_indexing.py → ...pandas/tests/indexes/test_indexing_adv.py b/python/pyspark/pandas/tests/test_indexing.py → ...pandas/tests/indexes/test_indexing_adv.py
@@ -22,11 +22,12 @@
 import pandas as pd
 
 from pyspark import pandas as ps
-from pyspark.pandas.exceptions import SparkPandasIndexingError, SparkPandasNotImplementedError
-from pyspark.testing.pandasutils import ComparisonTestBase, compare_both
+from pyspark.pandas.exceptions import SparkPandasNotImplementedError
+from pyspark.testing.pandasutils import PandasOnSparkTestCase, compare_both
+from pyspark.testing.sqlutils import SQLTestUtils
 
 
-class IndexingTest(ComparisonTestBase):
+class IndexingAdvMixin:
     @property
     def pdf(self):
         return pd.DataFrame(
@@ -41,6 +42,10 @@ def pdf2(self):
             index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
         )
 
+    @property
+    def psdf(self):
+        return ps.from_pandas(self.pdf)
+
     @property
     def psdf2(self):
         return ps.from_pandas(self.pdf2)
@@ -380,8 +385,16 @@ def test_index_operator_int(self):
             psdf.iloc[[1, 1]]
 
 
+class IndexingAdvTests(
+    IndexingAdvMixin,
+    PandasOnSparkTestCase,
+    SQLTestUtils,
+):
+    pass
+
+
 if __name__ == "__main__":
-    from pyspark.pandas.tests.test_indexing import *  # noqa: F401
+    from pyspark.pandas.tests.indexes.test_indexing_adv import *  # noqa: F401
 
     try:
         import xmlrunner