From 45668e31fcba0324aa2ee429e89617473e4b9544 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Tue, 26 Nov 2024 18:16:44 -0500 Subject: [PATCH] feat: make sure to quote formulas on Excel export (#31166) --- superset/utils/excel.py | 21 +++++++++++++++++++++ tests/unit_tests/utils/excel_tests.py | 13 +++++++++++++ 2 files changed, 34 insertions(+) diff --git a/superset/utils/excel.py b/superset/utils/excel.py index 8609be5b43e6b..602549975f113 100644 --- a/superset/utils/excel.py +++ b/superset/utils/excel.py @@ -22,9 +22,30 @@ from superset.utils.core import GenericDataType +def quote_formulas(df: pd.DataFrame) -> pd.DataFrame: + """ + Make sure to quote any formulas for security reasons. + """ + formula_prefixes = {"=", "+", "-", "@"} + + for col in df.select_dtypes(include="object").columns: + df[col] = df[col].apply( + lambda x: ( + f"'{x}" + if isinstance(x, str) and len(x) and x[0] in formula_prefixes + else x + ) + ) + + return df + + def df_to_excel(df: pd.DataFrame, **kwargs: Any) -> Any: output = io.BytesIO() + # make sure formulas are quoted, to prevent malicious injections + df = quote_formulas(df) + # pylint: disable=abstract-class-instantiated with pd.ExcelWriter(output, engine="xlsxwriter") as writer: df.to_excel(writer, **kwargs) diff --git a/tests/unit_tests/utils/excel_tests.py b/tests/unit_tests/utils/excel_tests.py index 745beff5052af..deb6d3d0b4eaf 100644 --- a/tests/unit_tests/utils/excel_tests.py +++ b/tests/unit_tests/utils/excel_tests.py @@ -34,6 +34,19 @@ def test_timezone_conversion() -> None: assert pd.read_excel(contents)["dt"][0] == "2023-01-01 00:00:00+00:00" +def test_quote_formulas() -> None: + """ + Test that formulas are quoted in Excel. + """ + df = pd.DataFrame({"formula": ["=SUM(A1:A2)", "normal", "@SUM(A1:A2)"]}) + contents = df_to_excel(df) + assert pd.read_excel(contents)["formula"].tolist() == [ + "'=SUM(A1:A2)", + "normal", + "'@SUM(A1:A2)", + ] + + def test_column_data_types_with_one_numeric_column(): df = pd.DataFrame( {