diff --git a/crates/ruff/tests/snapshots/show_settings__display_default_settings.snap b/crates/ruff/tests/snapshots/show_settings__display_default_settings.snap index dc4fad00a75d7b..46cd4b0dddbae1 100644 --- a/crates/ruff/tests/snapshots/show_settings__display_default_settings.snap +++ b/crates/ruff/tests/snapshots/show_settings__display_default_settings.snap @@ -375,6 +375,7 @@ linter.pylint.max_locals = 15 linter.pyupgrade.keep_runtime_typing = false linter.ruff.parenthesize_tuple_in_subscript = false linter.ruff.extend_markup_names = [] +linter.ruff.allowed_markup_calls = [] # Formatter Settings formatter.exclude = [] diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF035_whitelisted_markup_calls.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF035_whitelisted_markup_calls.py new file mode 100644 index 00000000000000..424306a2e7726d --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/ruff/RUF035_whitelisted_markup_calls.py @@ -0,0 +1,9 @@ +from bleach import clean +from markupsafe import Markup + +content = "" +Markup(clean(content)) + +# indirect assignments are currently not supported +cleaned = clean(content) +Markup(cleaned) # RUF035 diff --git a/crates/ruff_linter/src/rules/ruff/mod.rs b/crates/ruff_linter/src/rules/ruff/mod.rs index d883f5a0d2bc9f..241877180bd7e2 100644 --- a/crates/ruff_linter/src/rules/ruff/mod.rs +++ b/crates/ruff_linter/src/rules/ruff/mod.rs @@ -91,6 +91,7 @@ mod tests { ruff: super::settings::Settings { parenthesize_tuple_in_subscript: true, extend_markup_names: vec![], + allowed_markup_calls: vec![], }, ..LinterSettings::for_rule(Rule::IncorrectlyParenthesizedTupleInSubscript) }, @@ -107,6 +108,7 @@ mod tests { ruff: super::settings::Settings { parenthesize_tuple_in_subscript: false, extend_markup_names: vec![], + allowed_markup_calls: vec![], }, target_version: PythonVersion::Py310, ..LinterSettings::for_rule(Rule::IncorrectlyParenthesizedTupleInSubscript) @@ -447,6 +449,30 @@ mod tests { ruff: super::settings::Settings { parenthesize_tuple_in_subscript: true, extend_markup_names: vec!["webhelpers.html.literal".to_string()], + allowed_markup_calls: vec![], + }, + preview: PreviewMode::Enabled, + ..LinterSettings::for_rule(rule_code) + }, + )?; + assert_messages!(snapshot, diagnostics); + Ok(()) + } + + #[test_case(Rule::UnsafeMarkupUse, Path::new("RUF035_whitelisted_markup_calls.py"))] + fn whitelisted_markup_calls(rule_code: Rule, path: &Path) -> Result<()> { + let snapshot = format!( + "whitelisted_markup_calls__{}_{}", + rule_code.noqa_code(), + path.to_string_lossy() + ); + let diagnostics = test_path( + Path::new("ruff").join(path).as_path(), + &LinterSettings { + ruff: super::settings::Settings { + parenthesize_tuple_in_subscript: true, + extend_markup_names: vec![], + allowed_markup_calls: vec!["bleach.clean".to_string()], }, preview: PreviewMode::Enabled, ..LinterSettings::for_rule(rule_code) diff --git a/crates/ruff_linter/src/rules/ruff/rules/unsafe_markup_use.rs b/crates/ruff_linter/src/rules/ruff/rules/unsafe_markup_use.rs index 762c38d738740f..cb12e40b164800 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/unsafe_markup_use.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/unsafe_markup_use.rs @@ -1,9 +1,9 @@ -use ruff_python_ast::ExprCall; +use ruff_python_ast::{Expr, ExprCall}; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, ViolationMetadata}; use ruff_python_ast::name::QualifiedName; -use ruff_python_semantic::Modules; +use ruff_python_semantic::{Modules, SemanticModel}; use ruff_text_size::Ranged; use crate::{checkers::ast::Checker, settings::LinterSettings}; @@ -22,7 +22,9 @@ use crate::{checkers::ast::Checker, settings::LinterSettings}; /// treated like [`markupsafe.Markup`]. /// /// This rule was originally inspired by [flake8-markupsafe] but doesn't carve -/// out any exceptions for i18n related calls. +/// out any exceptions for i18n related calls by default. +/// +/// You can use [`lint.ruff.allowed-markup-calls`] to specify exceptions. /// /// ## Example /// Given: @@ -64,6 +66,7 @@ use crate::{checkers::ast::Checker, settings::LinterSettings}; /// ``` /// ## Options /// - `lint.ruff.extend-markup-names` +/// - `lint.ruff.allowed-markup-calls` /// /// ## References /// - [MarkupSafe](https://pypi.org/project/MarkupSafe/) @@ -95,7 +98,7 @@ pub(crate) fn unsafe_markup_call(checker: &mut Checker, call: &ExprCall) { return; } - if !is_unsafe_call(call) { + if !is_unsafe_call(call, checker.semantic(), checker.settings) { return; } @@ -127,12 +130,29 @@ fn is_markup_call(qualified_name: &QualifiedName, settings: &LinterSettings) -> .any(|target| *qualified_name == target) } -fn is_unsafe_call(call: &ExprCall) -> bool { +fn is_unsafe_call(call: &ExprCall, semantic: &SemanticModel, settings: &LinterSettings) -> bool { // technically this could be circumvented by using a keyword argument // but without type-inference we can't really know which keyword argument // corresponds to the first positional argument and either way it is // unlikely that someone will actually use a keyword argument here // TODO: Eventually we may want to allow dynamic values, as long as they // have a __html__ attribute, since that is part of the API - matches!(&*call.arguments.args, [first] if !first.is_string_literal_expr() && !first.is_bytes_literal_expr()) + matches!(&*call.arguments.args, [first] if !first.is_string_literal_expr() && !first.is_bytes_literal_expr() && !is_whitelisted_call(first, semantic, settings)) +} + +fn is_whitelisted_call(expr: &Expr, semantic: &SemanticModel, settings: &LinterSettings) -> bool { + let Expr::Call(ExprCall { func, .. }) = expr else { + return false; + }; + + let Some(qualified_name) = semantic.resolve_qualified_name(func) else { + return false; + }; + + settings + .ruff + .allowed_markup_calls + .iter() + .map(|target| QualifiedName::from_dotted_name(target)) + .any(|target| qualified_name == target) } diff --git a/crates/ruff_linter/src/rules/ruff/settings.rs b/crates/ruff_linter/src/rules/ruff/settings.rs index 3907fc063f0924..19c8e66596b2d7 100644 --- a/crates/ruff_linter/src/rules/ruff/settings.rs +++ b/crates/ruff_linter/src/rules/ruff/settings.rs @@ -8,6 +8,7 @@ use std::fmt; pub struct Settings { pub parenthesize_tuple_in_subscript: bool, pub extend_markup_names: Vec, + pub allowed_markup_calls: Vec, } impl fmt::Display for Settings { @@ -18,6 +19,7 @@ impl fmt::Display for Settings { fields = [ self.parenthesize_tuple_in_subscript, self.extend_markup_names | array, + self.allowed_markup_calls | array, ] } Ok(()) diff --git a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__whitelisted_markup_calls__RUF035_RUF035_whitelisted_markup_calls.py.snap b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__whitelisted_markup_calls__RUF035_RUF035_whitelisted_markup_calls.py.snap new file mode 100644 index 00000000000000..e3f18921f3870e --- /dev/null +++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__whitelisted_markup_calls__RUF035_RUF035_whitelisted_markup_calls.py.snap @@ -0,0 +1,10 @@ +--- +source: crates/ruff_linter/src/rules/ruff/mod.rs +--- +RUF035_whitelisted_markup_calls.py:9:1: RUF035 Unsafe use of `markupsafe.Markup` detected + | +7 | # indirect assignments are currently not supported +8 | cleaned = clean(content) +9 | Markup(cleaned) # RUF035 + | ^^^^^^^^^^^^^^^ RUF035 + | diff --git a/crates/ruff_workspace/src/options.rs b/crates/ruff_workspace/src/options.rs index 0d9fc739c557cf..57c0f2dbf47e32 100644 --- a/crates/ruff_workspace/src/options.rs +++ b/crates/ruff_workspace/src/options.rs @@ -3079,18 +3079,49 @@ pub struct RuffOptions { )] pub parenthesize_tuple_in_subscript: Option, - /// A list of additional callable names that behave like [`markupsafe.Markup`]. + /// A list of additional callable names that behave like + /// [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup). /// /// Expects to receive a list of fully-qualified names (e.g., `webhelpers.html.literal`, rather than /// `literal`). - /// - /// [markupsafe.Markup]: https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup #[option( default = "[]", value_type = "list[str]", example = "extend-markup-names = [\"webhelpers.html.literal\", \"my_package.Markup\"]" )] pub extend_markup_names: Option>, + + /// A list of callable names, whose result may be safely passed into + /// [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup). + /// + /// Expects to receive a list of fully-qualified names (e.g., `bleach.clean`, rather than `clean`). + /// + /// This setting helps you avoid false positives in code like: + /// + /// ```python + /// from bleach import clean + /// from markupsafe import Markup + /// + /// cleaned_markup = Markup(clean(some_user_input)) + /// ``` + /// + /// Where the use of [`bleach.clean`](https://bleach.readthedocs.io/en/latest/clean.html) + /// usually ensures that there's no XSS vulnerability. + /// + /// Although it is not recommended, you may also use this setting to whitelist other + /// kinds of calls, e.g. calls to i18n translation functions, where how safe that is + /// will depend on the implementation and how well the translations are audited. + /// + /// Another common use-case is to wrap the output of functions that generate markup + /// like [`xml.etree.ElementTree.tostring`](https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.tostring) + /// or template rendering engines where sanitization of potential user input is either + /// already baked in or has to happen before rendering. + #[option( + default = "[]", + value_type = "list[str]", + example = "allowed-markup-calls = [\"bleach.clean\", \"my_package.sanitize\"]" + )] + pub allowed_markup_calls: Option>, } impl RuffOptions { @@ -3100,6 +3131,7 @@ impl RuffOptions { .parenthesize_tuple_in_subscript .unwrap_or_default(), extend_markup_names: self.extend_markup_names.unwrap_or_default(), + allowed_markup_calls: self.allowed_markup_calls.unwrap_or_default(), } } } diff --git a/ruff.schema.json b/ruff.schema.json index a55f658c9741eb..c23e2a15ed04e2 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -2758,8 +2758,18 @@ "RuffOptions": { "type": "object", "properties": { + "allowed-markup-calls": { + "description": "A list of callable names, whose result may be safely passed into [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup).\n\nExpects to receive a list of fully-qualified names (e.g., `bleach.clean`, rather than `clean`).\n\nThis setting helps you avoid false positives in code like:\n\n```python from bleach import clean from markupsafe import Markup\n\ncleaned_markup = Markup(clean(some_user_input)) ```\n\nWhere the use of [`bleach.clean`](https://bleach.readthedocs.io/en/latest/clean.html) usually ensures that there's no XSS vulnerability.\n\nAlthough it is not recommended, you may also use this setting to whitelist other kinds of calls, e.g. calls to i18n translation functions, where how safe that is will depend on the implementation and how well the translations are audited.\n\nAnother common use-case is to wrap the output of functions that generate markup like [`xml.etree.ElementTree.tostring`](https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.tostring) or template rendering engines where sanitization of potential user input is either already baked in or has to happen before rendering.", + "type": [ + "array", + "null" + ], + "items": { + "type": "string" + } + }, "extend-markup-names": { - "description": "A list of additional callable names that behave like [`markupsafe.Markup`].\n\nExpects to receive a list of fully-qualified names (e.g., `webhelpers.html.literal`, rather than `literal`).\n\n[markupsafe.Markup]: https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup", + "description": "A list of additional callable names that behave like [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup).\n\nExpects to receive a list of fully-qualified names (e.g., `webhelpers.html.literal`, rather than `literal`).", "type": [ "array", "null"