From fbe8dab625fe0517578a3d9fe36a10b0278a295e Mon Sep 17 00:00:00 2001 From: tuziben Date: Wed, 20 Dec 2023 16:17:12 +0800 Subject: [PATCH] url decode the search index pattern (#4292) * url decode the search index pattern * use percent_decode crate * tiny fix * refine error message * remove duplicated decode * code format * Remove unwrap in extract_index_id_patterns --------- Co-authored-by: fmassot --- quickwit/Cargo.lock | 1 + quickwit/Cargo.toml | 1 + quickwit/quickwit-serve/Cargo.toml | 1 + .../src/search_api/rest_handler.rs | 30 +++++++++---------- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index f9ee3b2324b..b79a0838f81 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -5965,6 +5965,7 @@ dependencies = [ "num_cpus", "once_cell", "opentelemetry", + "percent-encoding", "quickwit-actors", "quickwit-cluster", "quickwit-common", diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 5e0190d5a7e..95dab9e19c2 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -123,6 +123,7 @@ openssl-probe = "0.1.5" opentelemetry = { version = "0.19", features = ["rt-tokio"] } opentelemetry-otlp = "0.12.0" ouroboros = "0.18.0" +percent-encoding = "2.3.1" pin-project = "1.1.0" pnet = { version = "0.33.0", features = ["std"] } postcard = { version = "1.0.4", features = [ diff --git a/quickwit/quickwit-serve/Cargo.toml b/quickwit/quickwit-serve/Cargo.toml index a6c53953d1a..4fe411e96de 100644 --- a/quickwit/quickwit-serve/Cargo.toml +++ b/quickwit/quickwit-serve/Cargo.toml @@ -24,6 +24,7 @@ itertools = { workspace = true } mime_guess = { workspace = true } num_cpus = { workspace = true } once_cell = { workspace = true } +percent-encoding = { workspace = true } regex = { workspace = true } rust-embed = { workspace = true } serde = { workspace = true } diff --git a/quickwit/quickwit-serve/src/search_api/rest_handler.rs b/quickwit/quickwit-serve/src/search_api/rest_handler.rs index 92d9df1a91d..b073e30b21e 100644 --- a/quickwit/quickwit-serve/src/search_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/search_api/rest_handler.rs @@ -20,16 +20,16 @@ use std::convert::TryFrom; use std::sync::Arc; +use anyhow::Context; use futures::stream::StreamExt; use hyper::header::HeaderValue; use hyper::HeaderMap; -use once_cell::sync::Lazy; +use percent_encoding::percent_decode_str; use quickwit_config::validate_index_id_pattern; use quickwit_proto::search::{CountHits, OutputFormat, SortField, SortOrder}; use quickwit_proto::ServiceError; use quickwit_query::query_ast::query_ast_from_user_text; use quickwit_search::{SearchError, SearchResponseRest, SearchService}; -use regex::Regex; use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; use serde_json::Value as JsonValue; use tracing::info; @@ -56,20 +56,18 @@ use crate::{with_arg, BodyFormat}; )] pub struct SearchApi; -// Matches index patterns separated by commas or its URL encoded version '%2C'. -static COMMA_SEPARATED_INDEX_PATTERNS_REGEX: Lazy = - Lazy::new(|| Regex::new(r",|%2C").expect("the regular expression should compile")); - pub(crate) async fn extract_index_id_patterns( comma_separated_index_patterns: String, ) -> Result, Rejection> { + let index_pattern = percent_decode_str(&comma_separated_index_patterns) + .decode_utf8() + .context("index pattern does not contain valid utf8 characters") + .map_err(|error| crate::rest::InvalidArgument(error.to_string()))?; + let mut index_ids_patterns = Vec::new(); - for index_id_pattern in - COMMA_SEPARATED_INDEX_PATTERNS_REGEX.split(&comma_separated_index_patterns) - { - validate_index_id_pattern(index_id_pattern).map_err(|error| { - warp::reject::custom(crate::rest::InvalidArgument(error.to_string())) - })?; + for index_id_pattern in index_pattern.split(',').collect::>() { + validate_index_id_pattern(index_id_pattern) + .map_err(|error| crate::rest::InvalidArgument(error.to_string()))?; index_ids_patterns.push(index_id_pattern.to_string()); } assert!(!index_ids_patterns.is_empty()); @@ -538,16 +536,16 @@ mod tests { .await .unwrap(); assert_eq!( - extract_index_id_patterns("my-index-1,my-index-2".to_string()) + extract_index_id_patterns("my-index-1,my-index-2%2A".to_string()) .await .unwrap(), - vec!["my-index-1".to_string(), "my-index-2".to_string()] + vec!["my-index-1".to_string(), "my-index-2*".to_string()] ); assert_eq!( - extract_index_id_patterns("my-index-1%2Cmy-index-2".to_string()) + extract_index_id_patterns("my-index-1%2Cmy-index-%2A".to_string()) .await .unwrap(), - vec!["my-index-1".to_string(), "my-index-2".to_string()] + vec!["my-index-1".to_string(), "my-index-*".to_string()] ); extract_index_id_patterns("".to_string()).await.unwrap_err(); extract_index_id_patterns(" ".to_string())