From b29355f44de3e1691727d3209cf0e3a07f306c16 Mon Sep 17 00:00:00 2001
From: David Kunzmann <david.kunzmann@sonarsource.com>
Date: Mon, 9 Oct 2023 15:31:47 +0200
Subject: [PATCH] SONARPY-1514: Rule S6742 should raise issues on chains of 7
 or more operations. (#1600)

---
 .../checks/PandasChainInstructionCheck.java   |  2 +-
 .../checks/pandasChainInstructionCheck.py     | 30 +++++++++----------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/python-checks/src/main/java/org/sonar/python/checks/PandasChainInstructionCheck.java b/python-checks/src/main/java/org/sonar/python/checks/PandasChainInstructionCheck.java
index 5224460197..be1961e25e 100644
--- a/python-checks/src/main/java/org/sonar/python/checks/PandasChainInstructionCheck.java
+++ b/python-checks/src/main/java/org/sonar/python/checks/PandasChainInstructionCheck.java
@@ -43,7 +43,7 @@
 public class PandasChainInstructionCheck extends PythonSubscriptionCheck {
 
   private static final String MESSAGE = "Refactor this long chain of instructions with pandas.pipe";
-  private static final int MAX_CHAIN_LENGTH = 5;
+  private static final int MAX_CHAIN_LENGTH = 7;
 
   private static final String DATAFRAME_FQN = "pandas.core.frame.DataFrame";
 
diff --git a/python-checks/src/test/resources/checks/pandasChainInstructionCheck.py b/python-checks/src/test/resources/checks/pandasChainInstructionCheck.py
index d516f49077..452e76478a 100644
--- a/python-checks/src/test/resources/checks/pandasChainInstructionCheck.py
+++ b/python-checks/src/test/resources/checks/pandasChainInstructionCheck.py
@@ -4,30 +4,30 @@
 
 def non_compliant(df: pd.DataFrame, df2: DataFrame):
 
-    df2.set_index("name").T.filter(like='joe', axis=0)[1].mean().head()  # Noncompliant
-#   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    DataFrame().set_index("name").filter(like='joe', axis=0).groupby("team")["salary"].mean().head()  # Noncompliant {{Refactor this long chain of instructions with pandas.pipe}}
-#   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    df.set_index("name").filter(like='joe', axis=0).groupby("team")["salary"].mean().head()  # FN see SONARPY-1503
+    df2.set_index("name").T.filter(like='joe', axis=0)[1].add(10).mean().round().to_parquet()  # Noncompliant
+#   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    DataFrame().set_index("name").filter(like='joe', axis=0).groupby("team")["salary"].add(10).mean().round().to_parquet()  # Noncompliant {{Refactor this long chain of instructions with pandas.pipe}}
+#   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    df.set_index("name").filter(like='joe', axis=0).groupby("team")["salary"].add(10).mean().round().to_parquet()  # FN see SONARPY-1503
 
-    df2.set_index("name").filter(like='joe', axis=0).groupby("team")["salary"]["test"].mean().head()  # Noncompliant {{Refactor this long chain of instructions with pandas.pipe}}
-#   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-    pd.read_csv("some_csv.csv").filter(like='joe', axis=0).groupby("team")["salary"]["test"].mean().head() # Noncompliant
-#   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    df2.set_index("name").filter(like='joe', axis=0).groupby("team")["salary"]["test"].add(10).mean().round().to_parquet()  # Noncompliant {{Refactor this long chain of instructions with pandas.pipe}}
+#   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    pd.read_csv("some_csv.csv").filter(like='joe', axis=0).groupby("team")["salary"]["test"].add(10).mean().round().to_parquet()  # Noncompliant
+#   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 
 #   Here we do not raise an issue only because we do not support subscription with Name
 #   If support is added for such case we would encounter FPs when the subscription with Name is at the beginning of the chain 
-    pd.read_csv("some_csv.csv").filter(like='joe', axis=0).groupby("team")["salary"]["test"].axes[1].unique() # FN
+    pd.read_csv("some_csv.csv").filter(like='joe', axis=0).add(10).groupby("team")["salary"]["test"].axes[1].unique().to_json()  # FN
 
 #   Here we should not raise an issue as the chain is done mainly on an Index object which does not have a pipe method
-    pd.read_csv("some_csv.csv").axes[1].join(pd.Index([4, 5, 6])).repeat([1,2]).drop_duplicates().insert(1, 42)
+    pd.read_csv("some_csv.csv").axes[1].join(pd.Index([4, 5, 6])).T.repeat([1,2]).drop_duplicates().insert(1, 42).sort_values()
 
 def compliant(df: pd.DataFrame, my_function, something, df2: DataFrame):
 
-    df2.set_index("name").T.filter(like='joe', axis=0)[1].mean()
+    df2.set_index("name").T.filter(like='joe', axis=0)[1].add(10).mean().to_html()
 
-    (df2.set_index("name").T.filter(like='joe', axis=0))[1].mean()
+    (df2.set_index("name").T.filter(like='joe', axis=0))[1].add(10).mean().round().to_html()
 
     df2.set_index("name").filter(like='joe', axis=0).mean().head()
 
@@ -35,7 +35,7 @@ def compliant(df: pd.DataFrame, my_function, something, df2: DataFrame):
 
     df.set_index("name").filter(like='joe', axis=0).groupby("team")["salary"].mean()
 
-    DataFrame().set_index("name").pipe(my_function).filter(like='joe', axis=0).groupby("team")["salary"].mean()
+    DataFrame().set_index("name").pipe(my_function).filter(like='joe', axis=0).groupby("team")["salary"].add(10).round().mean().to_json()
 
-    something.set_index("name").filter(like='joe', axis=0).groupby("team")["salary"].mean().head()
+    something.set_index("name").filter(like='joe', axis=0).groupby("team")["salary"].add(10).round().mean().to_parquet()