Skip to content

Commit

Permalink
feat: make sure to quote formulas on Excel export (apache#31166)
Browse files Browse the repository at this point in the history
  • Loading branch information
betodealmeida authored Nov 26, 2024
1 parent 529aed5 commit 45668e3
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 0 deletions.
21 changes: 21 additions & 0 deletions superset/utils/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,30 @@
from superset.utils.core import GenericDataType


def quote_formulas(df: pd.DataFrame) -> pd.DataFrame:
"""
Make sure to quote any formulas for security reasons.
"""
formula_prefixes = {"=", "+", "-", "@"}

for col in df.select_dtypes(include="object").columns:
df[col] = df[col].apply(
lambda x: (
f"'{x}"
if isinstance(x, str) and len(x) and x[0] in formula_prefixes
else x
)
)

return df


def df_to_excel(df: pd.DataFrame, **kwargs: Any) -> Any:
output = io.BytesIO()

# make sure formulas are quoted, to prevent malicious injections
df = quote_formulas(df)

# pylint: disable=abstract-class-instantiated
with pd.ExcelWriter(output, engine="xlsxwriter") as writer:
df.to_excel(writer, **kwargs)
Expand Down
13 changes: 13 additions & 0 deletions tests/unit_tests/utils/excel_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ def test_timezone_conversion() -> None:
assert pd.read_excel(contents)["dt"][0] == "2023-01-01 00:00:00+00:00"


def test_quote_formulas() -> None:
"""
Test that formulas are quoted in Excel.
"""
df = pd.DataFrame({"formula": ["=SUM(A1:A2)", "normal", "@SUM(A1:A2)"]})
contents = df_to_excel(df)
assert pd.read_excel(contents)["formula"].tolist() == [
"'=SUM(A1:A2)",
"normal",
"'@SUM(A1:A2)",
]


def test_column_data_types_with_one_numeric_column():
df = pd.DataFrame(
{
Expand Down

0 comments on commit 45668e3

Please sign in to comment.