From 8f66aefaab5653e74a43377f413a8c4af070b7ed Mon Sep 17 00:00:00 2001 From: jingz-db Date: Fri, 24 Jan 2025 11:07:20 +0900 Subject: [PATCH] [SPARK-50908][SS][PYTHON] Disable flaky TTL test in `test_pandas_transform_with_state.py` ### What changes were proposed in this pull request? Disable the flaky TTL suite for TransformWithStateInPandas. ### Why are the changes needed? The suite is flaky on CI because for this test case, we want to test on scenarios of one key expires and the other key is still in the state store. The tricky thing is that we don't have equivalent of `StreamManualClock` in Scala and we have to manually set a Thread.sleep() time duration such that the TTL expires after a certain time duration while the other key does not. As the batch duration varies a lot on CI, this suite is flaky. We decided to disable the suite until we find a holistic fix. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Tested on CI. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49634 from jingz-db/fix-ttl-suite. Authored-by: jingz-db Signed-off-by: Hyukjin Kwon --- .../sql/tests/pandas/test_pandas_transform_with_state.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py b/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py index fec2e5d0caa2e..9e2ff78e19934 100644 --- a/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py @@ -330,9 +330,8 @@ def check_results(batch_df, batch_id): SimpleTTLStatefulProcessor(), check_results, False, "processingTime" ) - @unittest.skipIf( - "COVERAGE_PROCESS_START" in os.environ, "Flaky with coverage enabled, skipping for now." - ) + # TODO SPARK-50908 holistic fix for TTL suite + @unittest.skip("test is flaky and it is only a timing issue, skipping until we can resolve") def test_value_state_ttl_expiration(self): def check_results(batch_df, batch_id): if batch_id == 0: