From 7499212028156b92ee0325319cafc33a45b2ab01 Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Tue, 7 Jan 2025 12:09:47 -0800 Subject: [PATCH] PII Filtering: Detect bind parameters in CONTEXT as statement_text This can occur in certain cases, for example auto_explain output on some Postgres versions. We previously would have not detected this CONTEXT line, but are now correctly detecting it as containing statement_parameter log secrets. In passing add a test case for the new "Query Parameters" field in auto_explain which gets redacted based on the statement_text filter that filters out the whole plan text (we normalize the individual fields in resulting query samples, but the log text for auto_explain is redacted altogether). --- logs/analyze.go | 7 ++++++- logs/replace_test.go | 10 ++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/logs/analyze.go b/logs/analyze.go index 685c75ad5..830307f7e 100644 --- a/logs/analyze.go +++ b/logs/analyze.go @@ -1153,6 +1153,11 @@ var otherContextPatterns = []match{ regexp: regexp.MustCompile(`^JSON data, line (\d+): (.+)`), secrets: []state.LogSecretKind{0, state.TableDataLogSecret}, }, + { + prefixes: []string{"portal \"", "unnamed portal "}, + regexp: regexp.MustCompile(`(?:(?:unnamed portal|portal \"(.+)\") with parameters: |, )\$\d+ = (?:(NULL)|'((?:[^']|'')*)')`), + secrets: []state.LogSecretKind{0, state.StatementParameterLogSecret, state.StatementParameterLogSecret}, + }, } var autoVacuumIndexRegexp = regexp.MustCompile(`index "(.+?)": pages: (\d+) in total, (\d+) newly deleted, (\d+) currently deleted, (\d+) reusable,?\s*`) @@ -2170,7 +2175,7 @@ func matchOtherContextLogLine(logLine state.LogLine) state.LogLine { return logLine } for _, match := range otherContextPatterns { - logLine, parts := matchLogLine(logLine, match) + logLine, parts := matchLogLineAll(logLine, match) if parts != nil { return logLine } diff --git a/logs/replace_test.go b/logs/replace_test.go index d69bac045..b761a4af3 100644 --- a/logs/replace_test.go +++ b/logs/replace_test.go @@ -29,11 +29,21 @@ var replaceTests = []replaceTestpair{ input: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:LOG: duration: 1242.570 ms statement: SELECT 1\n", output: "duration: 1242.570 ms statement: [redacted]\n", }, + { + filterLogSecret: "statement_text", + input: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:LOG: duration: 2007.111 ms plan:\n{\"Query Text\": \"SELECT pg_sleep($1)\", \"Query Parameters\": \"$1 = '2'\", \"Plan\": { } }\n", + output: "duration: 2007.111 ms plan:\n[redacted]\n", + }, { filterLogSecret: "statement_parameter", input: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:LOG: duration: 4079.697 ms execute : \nSELECT * FROM x WHERE y = $1 LIMIT $2\n2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:DETAIL: parameters: $1 = 'long string', $2 = '1', $3 = 'long string'\n", output: "duration: 4079.697 ms execute : \nSELECT * FROM x WHERE y = $1 LIMIT $2\nparameters: $1 = '[redacted]', $2 = '[redacted]', $3 = '[redacted]'\n", }, + { + filterLogSecret: "statement_parameter", + input: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:LOG: duration: 2007.111 ms plan:\n{\"Query Text\": \"SELECT * FROM x WHERE y = $1 LIMIT $2\", \"Plan\": { } }\n2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:CONTEXT: unnamed portal with parameters: $1 = 'long string', $2 = '1', $3 = 'long string'\n", + output: "duration: 2007.111 ms plan:\n{\"Query Text\": \"SELECT * FROM x WHERE y = $1 LIMIT $2\", \"Plan\": { } }\nunnamed portal with parameters: $1 = '[redacted]', $2 = '[redacted]', $3 = '[redacted]'\n", + }, { filterLogSecret: "none", input: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:ERROR: division by zero\n2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:ERROR: Unknown Data\n",