From fbaf6df228b94c374f74901af5e4ea7d532b2b72 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 27 Jan 2025 16:26:22 -0500 Subject: [PATCH 1/5] perf: improve to_file_path() --- url/benches/parse_url.rs | 14 ++++++++++++++ url/src/lib.rs | 31 ++++++++++++++++++------------- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/url/benches/parse_url.rs b/url/benches/parse_url.rs index 531c2e99..19cfdd92 100644 --- a/url/benches/parse_url.rs +++ b/url/benches/parse_url.rs @@ -82,6 +82,19 @@ fn punycode_rtl(bench: &mut Bencher) { bench.iter(|| black_box(url).parse::().unwrap()); } +fn url_to_file_path(bench: &mut Bencher) { + let url = if cfg!(windows) { + "file:///C:/dir/next_dir/sub_sub_dir/testing/testing.json" + } else { + "file:///data/dir/next_dir/sub_sub_dir/testing/testing.json" + }; + let url = url.parse::().unwrap(); + + bench.iter(|| { + black_box(url.to_file_path().unwrap()); + }); +} + benchmark_group!( benches, short, @@ -95,5 +108,6 @@ benchmark_group!( punycode_ltr, unicode_rtl, punycode_rtl, + url_to_file_path ); benchmark_main!(benches); diff --git a/url/src/lib.rs b/url/src/lib.rs index e015acce..c0f2992c 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -2722,7 +2722,7 @@ impl Url { _ => return Err(()), }; - return file_url_segments_to_pathbuf(host, segments); + return file_url_segments_to_pathbuf(self.as_str().len(), host, segments); } Err(()) } @@ -3032,6 +3032,7 @@ fn path_to_file_url_segments_windows( any(unix, target_os = "redox", target_os = "wasi", target_os = "hermit") ))] fn file_url_segments_to_pathbuf( + estimated_capacity: usize, host: Option<&str>, segments: str::Split<'_, char>, ) -> Result { @@ -3049,11 +3050,10 @@ fn file_url_segments_to_pathbuf( return Err(()); } - let mut bytes = if cfg!(target_os = "redox") { - b"file:".to_vec() - } else { - Vec::new() - }; + let mut bytes = Vec::with_capacity(estimated_capacity); + if cfg!(target_os = "redox") { + bytes.extend(b"file:"); + } for segment in segments { bytes.push(b'/'); @@ -3085,22 +3085,26 @@ fn file_url_segments_to_pathbuf( #[cfg(all(feature = "std", windows))] fn file_url_segments_to_pathbuf( + estimated_capacity: usize, host: Option<&str>, segments: str::Split, ) -> Result { - file_url_segments_to_pathbuf_windows(host, segments) + file_url_segments_to_pathbuf_windows(estimated_capacity, host, segments) } // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 #[cfg(feature = "std")] #[cfg_attr(not(windows), allow(dead_code))] fn file_url_segments_to_pathbuf_windows( + estimated_capacity: usize, host: Option<&str>, mut segments: str::Split<'_, char>, ) -> Result { - use percent_encoding::percent_decode; - let mut string = if let Some(host) = host { - r"\\".to_owned() + host + use percent_encoding::percent_decode_str; + let mut string = String::with_capacity(estimated_capacity); + if let Some(host) = host { + string.push('\\'); + string.push_str(host); } else { let first = segments.next().ok_or(())?; @@ -3110,7 +3114,7 @@ fn file_url_segments_to_pathbuf_windows( return Err(()); } - first.to_owned() + string.push_str(first); } 4 => { @@ -3122,7 +3126,8 @@ fn file_url_segments_to_pathbuf_windows( return Err(()); } - first[0..1].to_owned() + ":" + string.push_str(&first[0..1]); + string.push(':'); } _ => return Err(()), @@ -3133,7 +3138,7 @@ fn file_url_segments_to_pathbuf_windows( string.push('\\'); // Currently non-unicode windows paths cannot be represented - match String::from_utf8(percent_decode(segment.as_bytes()).collect()) { + match percent_decode_str(segment).decode_utf8() { Ok(s) => string.push_str(&s), Err(..) => return Err(()), } From 93682a679d108589706269975687130efdda6013 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 27 Jan 2025 16:28:31 -0500 Subject: [PATCH 2/5] fix --- url/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/url/src/lib.rs b/url/src/lib.rs index c0f2992c..f7e75e2b 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -3103,7 +3103,7 @@ fn file_url_segments_to_pathbuf_windows( use percent_encoding::percent_decode_str; let mut string = String::with_capacity(estimated_capacity); if let Some(host) = host { - string.push('\\'); + string.push_str(r"\\"); string.push_str(host); } else { let first = segments.next().ok_or(())?; From 3e1aeb4d9d6692b10c6d2f27f8007ebc822124fd Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 6 Feb 2025 13:56:54 -0500 Subject: [PATCH 3/5] make estimate more exact --- url/src/lib.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/url/src/lib.rs b/url/src/lib.rs index 6af51a2e..8586c407 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -2720,7 +2720,18 @@ impl Url { _ => return Err(()), }; - return file_url_segments_to_pathbuf(self.as_str().len(), host, segments); + let estimated_capacity = self.as_str().len() + - if cfg!(target_os = "redox") { + // remove only // because it still has file: + 2 + } else if cfg!(windows) { + // remove file: - has posssible \\ for hostname + 5 + } else { + // remove file:// + 7 + }; + return file_url_segments_to_pathbuf(estimated_capacity, host, segments); } Err(()) } @@ -3141,6 +3152,13 @@ fn file_url_segments_to_pathbuf_windows( Err(..) => return Err(()), } } + // ensure our estimated capacity was good + debug_assert!( + string.len() <= estimated_capacity, + "len: {}, capacity: {}", + string.len(), + estimated_capacity + ); let path = PathBuf::from(string); debug_assert!( path.is_absolute(), From 51bc795e79189808d878f62632d4926ea35bd0a2 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 6 Feb 2025 17:00:20 -0500 Subject: [PATCH 4/5] use saturating_sub --- url/src/lib.rs | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/url/src/lib.rs b/url/src/lib.rs index 8586c407..adb28f5a 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -2720,17 +2720,25 @@ impl Url { _ => return Err(()), }; - let estimated_capacity = self.as_str().len() - - if cfg!(target_os = "redox") { - // remove only // because it still has file: - 2 - } else if cfg!(windows) { - // remove file: - has posssible \\ for hostname - 5 + let str_len = self.as_str().len(); + let estimated_capacity = if cfg!(target_os = "redox") { + let scheme_len = self.scheme().len(); + let file_scheme_len = "file".len(); + // remove only // because it still has file: + if scheme_len < file_scheme_len { + let scheme_diff = file_scheme_len - scheme_len; + (str_len + scheme_diff).saturating_sub(2) } else { - // remove file:// - 7 - }; + let scheme_diff = scheme_len - file_scheme_len; + str_len.saturating_sub(scheme_diff + 2) + } + } else if cfg!(windows) { + // remove scheme: - has posssible \\ for hostname + str_len.saturating_sub(self.scheme().len() + 1) + } else { + // remove scheme:// + str_len.saturating_sub(self.scheme().len() + 3) + }; return file_url_segments_to_pathbuf(estimated_capacity, host, segments); } Err(()) @@ -3110,7 +3118,8 @@ fn file_url_segments_to_pathbuf_windows( mut segments: str::Split<'_, char>, ) -> Result { use percent_encoding::percent_decode_str; - let mut string = String::with_capacity(estimated_capacity); + let mut string = String::new(); + string.try_reserve(estimated_capacity).map_err(|_| ())?; if let Some(host) = host { string.push_str(r"\\"); string.push_str(host); @@ -3153,12 +3162,14 @@ fn file_url_segments_to_pathbuf_windows( } } // ensure our estimated capacity was good - debug_assert!( - string.len() <= estimated_capacity, - "len: {}, capacity: {}", - string.len(), - estimated_capacity - ); + if cfg!(test) { + debug_assert!( + string.len() <= estimated_capacity, + "len: {}, capacity: {}", + string.len(), + estimated_capacity + ); + } let path = PathBuf::from(string); debug_assert!( path.is_absolute(), From 22873d0588dfdd7db2529465904d892704daf36d Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 6 Feb 2025 17:22:00 -0500 Subject: [PATCH 5/5] better to try_reserve --- url/src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/url/src/lib.rs b/url/src/lib.rs index adb28f5a..b8bc668a 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -3067,7 +3067,8 @@ fn file_url_segments_to_pathbuf( return Err(()); } - let mut bytes = Vec::with_capacity(estimated_capacity); + let mut bytes = Vec::new(); + bytes.try_reserve(estimated_capacity).map_err(|_| ())?; if cfg!(target_os = "redox") { bytes.extend(b"file:"); }