From b29355f44de3e1691727d3209cf0e3a07f306c16 Mon Sep 17 00:00:00 2001 From: David Kunzmann Date: Mon, 9 Oct 2023 15:31:47 +0200 Subject: [PATCH] SONARPY-1514: Rule S6742 should raise issues on chains of 7 or more operations. (#1600) --- .../checks/PandasChainInstructionCheck.java | 2 +- .../checks/pandasChainInstructionCheck.py | 30 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/python-checks/src/main/java/org/sonar/python/checks/PandasChainInstructionCheck.java b/python-checks/src/main/java/org/sonar/python/checks/PandasChainInstructionCheck.java index 5224460197..be1961e25e 100644 --- a/python-checks/src/main/java/org/sonar/python/checks/PandasChainInstructionCheck.java +++ b/python-checks/src/main/java/org/sonar/python/checks/PandasChainInstructionCheck.java @@ -43,7 +43,7 @@ public class PandasChainInstructionCheck extends PythonSubscriptionCheck { private static final String MESSAGE = "Refactor this long chain of instructions with pandas.pipe"; - private static final int MAX_CHAIN_LENGTH = 5; + private static final int MAX_CHAIN_LENGTH = 7; private static final String DATAFRAME_FQN = "pandas.core.frame.DataFrame"; diff --git a/python-checks/src/test/resources/checks/pandasChainInstructionCheck.py b/python-checks/src/test/resources/checks/pandasChainInstructionCheck.py index d516f49077..452e76478a 100644 --- a/python-checks/src/test/resources/checks/pandasChainInstructionCheck.py +++ b/python-checks/src/test/resources/checks/pandasChainInstructionCheck.py @@ -4,30 +4,30 @@ def non_compliant(df: pd.DataFrame, df2: DataFrame): - df2.set_index("name").T.filter(like='joe', axis=0)[1].mean().head() # Noncompliant -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - DataFrame().set_index("name").filter(like='joe', axis=0).groupby("team")["salary"].mean().head() # Noncompliant {{Refactor this long chain of instructions with pandas.pipe}} -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - df.set_index("name").filter(like='joe', axis=0).groupby("team")["salary"].mean().head() # FN see SONARPY-1503 + df2.set_index("name").T.filter(like='joe', axis=0)[1].add(10).mean().round().to_parquet() # Noncompliant +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + DataFrame().set_index("name").filter(like='joe', axis=0).groupby("team")["salary"].add(10).mean().round().to_parquet() # Noncompliant {{Refactor this long chain of instructions with pandas.pipe}} +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + df.set_index("name").filter(like='joe', axis=0).groupby("team")["salary"].add(10).mean().round().to_parquet() # FN see SONARPY-1503 - df2.set_index("name").filter(like='joe', axis=0).groupby("team")["salary"]["test"].mean().head() # Noncompliant {{Refactor this long chain of instructions with pandas.pipe}} -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - pd.read_csv("some_csv.csv").filter(like='joe', axis=0).groupby("team")["salary"]["test"].mean().head() # Noncompliant -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + df2.set_index("name").filter(like='joe', axis=0).groupby("team")["salary"]["test"].add(10).mean().round().to_parquet() # Noncompliant {{Refactor this long chain of instructions with pandas.pipe}} +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + pd.read_csv("some_csv.csv").filter(like='joe', axis=0).groupby("team")["salary"]["test"].add(10).mean().round().to_parquet() # Noncompliant +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # Here we do not raise an issue only because we do not support subscription with Name # If support is added for such case we would encounter FPs when the subscription with Name is at the beginning of the chain - pd.read_csv("some_csv.csv").filter(like='joe', axis=0).groupby("team")["salary"]["test"].axes[1].unique() # FN + pd.read_csv("some_csv.csv").filter(like='joe', axis=0).add(10).groupby("team")["salary"]["test"].axes[1].unique().to_json() # FN # Here we should not raise an issue as the chain is done mainly on an Index object which does not have a pipe method - pd.read_csv("some_csv.csv").axes[1].join(pd.Index([4, 5, 6])).repeat([1,2]).drop_duplicates().insert(1, 42) + pd.read_csv("some_csv.csv").axes[1].join(pd.Index([4, 5, 6])).T.repeat([1,2]).drop_duplicates().insert(1, 42).sort_values() def compliant(df: pd.DataFrame, my_function, something, df2: DataFrame): - df2.set_index("name").T.filter(like='joe', axis=0)[1].mean() + df2.set_index("name").T.filter(like='joe', axis=0)[1].add(10).mean().to_html() - (df2.set_index("name").T.filter(like='joe', axis=0))[1].mean() + (df2.set_index("name").T.filter(like='joe', axis=0))[1].add(10).mean().round().to_html() df2.set_index("name").filter(like='joe', axis=0).mean().head() @@ -35,7 +35,7 @@ def compliant(df: pd.DataFrame, my_function, something, df2: DataFrame): df.set_index("name").filter(like='joe', axis=0).groupby("team")["salary"].mean() - DataFrame().set_index("name").pipe(my_function).filter(like='joe', axis=0).groupby("team")["salary"].mean() + DataFrame().set_index("name").pipe(my_function).filter(like='joe', axis=0).groupby("team")["salary"].add(10).round().mean().to_json() - something.set_index("name").filter(like='joe', axis=0).groupby("team")["salary"].mean().head() + something.set_index("name").filter(like='joe', axis=0).groupby("team")["salary"].add(10).round().mean().to_parquet()