From 8b802bf6f07622e1a351aa291a102a4006b39ae5 Mon Sep 17 00:00:00 2001 From: Christian Schmidt Date: Fri, 6 Oct 2023 16:53:00 +0200 Subject: [PATCH] Do more conservative URL normalization (#758) --- .rubocop.yml | 1 + .rubocop/metrics.yml | 4 ++ lib/http/uri.rb | 20 +++++- spec/lib/http/uri/normalizer_spec.rb | 95 ++++++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 2 deletions(-) create mode 100644 .rubocop/metrics.yml create mode 100644 spec/lib/http/uri/normalizer_spec.rb diff --git a/.rubocop.yml b/.rubocop.yml index b531d30f..e2956f84 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,6 +1,7 @@ inherit_from: - .rubocop_todo.yml - .rubocop/layout.yml + - .rubocop/metrics.yml - .rubocop/style.yml AllCops: diff --git a/.rubocop/metrics.yml b/.rubocop/metrics.yml new file mode 100644 index 00000000..944c40ae --- /dev/null +++ b/.rubocop/metrics.yml @@ -0,0 +1,4 @@ +Metrics/BlockLength: + Exclude: + - 'spec/**/*.rb' + - '*.gemspec' \ No newline at end of file diff --git a/lib/http/uri.rb b/lib/http/uri.rb index 9c8a54c3..9bc93adf 100644 --- a/lib/http/uri.rb +++ b/lib/http/uri.rb @@ -37,6 +37,9 @@ class URI # @private HTTPS_SCHEME = "https" + # @private + PERCENT_ENCODE = /[^\x21-\x7E]+/.freeze + # @private NORMALIZER = lambda do |uri| uri = HTTP::URI.parse uri @@ -44,8 +47,8 @@ class URI HTTP::URI.new( :scheme => uri.normalized_scheme, :authority => uri.normalized_authority, - :path => uri.normalized_path, - :query => uri.query, + :path => uri.path.empty? ? "/" : percent_encode(Addressable::URI.normalize_path(uri.path)), + :query => percent_encode(uri.query), :fragment => uri.normalized_fragment ) end @@ -71,6 +74,19 @@ def self.form_encode(form_values, sort = false) Addressable::URI.form_encode(form_values, sort) end + # Percent-encode all characters matching a regular expression. + # + # @param [String] string raw string + # + # @return [String] encoded value + # + # @private + def self.percent_encode(string) + string&.gsub(PERCENT_ENCODE) do |substr| + substr.encode(Encoding::UTF_8).bytes.map { |c| format("%%%02X", c) }.join + end + end + # Creates an HTTP::URI instance from the given options # # @param [Hash, Addressable::URI] options_or_uri diff --git a/spec/lib/http/uri/normalizer_spec.rb b/spec/lib/http/uri/normalizer_spec.rb new file mode 100644 index 00000000..c8720b2b --- /dev/null +++ b/spec/lib/http/uri/normalizer_spec.rb @@ -0,0 +1,95 @@ +# frozen_string_literal: true + +RSpec.describe HTTP::URI::NORMALIZER do + describe "scheme" do + it "lower-cases scheme" do + expect(HTTP::URI::NORMALIZER.call("HttP://example.com").scheme).to eq "http" + end + end + + describe "hostname" do + it "lower-cases hostname" do + expect(HTTP::URI::NORMALIZER.call("http://EXAMPLE.com").host).to eq "example.com" + end + + it "decodes percent-encoded hostname" do + expect(HTTP::URI::NORMALIZER.call("http://ex%61mple.com").host).to eq "example.com" + end + + it "removes trailing period in hostname" do + expect(HTTP::URI::NORMALIZER.call("http://example.com.").host).to eq "example.com" + end + + it "IDN-encodes non-ASCII hostname" do + expect(HTTP::URI::NORMALIZER.call("http://exämple.com").host).to eq "xn--exmple-cua.com" + end + end + + describe "path" do + it "ensures path is not empty" do + expect(HTTP::URI::NORMALIZER.call("http://example.com").path).to eq "/" + end + + it "preserves double slashes in path" do + expect(HTTP::URI::NORMALIZER.call("http://example.com//a///b").path).to eq "//a///b" + end + + it "resolves single-dot segments in path" do + expect(HTTP::URI::NORMALIZER.call("http://example.com/a/./b").path).to eq "/a/b" + end + + it "resolves double-dot segments in path" do + expect(HTTP::URI::NORMALIZER.call("http://example.com/a/b/../c").path).to eq "/a/c" + end + + it "resolves leading double-dot segments in path" do + expect(HTTP::URI::NORMALIZER.call("http://example.com/../a/b").path).to eq "/a/b" + end + + it "percent-encodes control characters in path" do + expect(HTTP::URI::NORMALIZER.call("http://example.com/\x00\x7F\n").path).to eq "/%00%7F%0A" + end + + it "percent-encodes space in path" do + expect(HTTP::URI::NORMALIZER.call("http://example.com/a b").path).to eq "/a%20b" + end + + it "percent-encodes non-ASCII characters in path" do + expect(HTTP::URI::NORMALIZER.call("http://example.com/キョ").path).to eq "/%E3%82%AD%E3%83%A7" + end + + it "does not percent-encode non-special characters in path" do + expect(HTTP::URI::NORMALIZER.call("http://example.com/~.-_!$&()*,;=:@{}").path).to eq "/~.-_!$&()*,;=:@{}" + end + + it "preserves escape sequences in path" do + expect(HTTP::URI::NORMALIZER.call("http://example.com/%41").path).to eq "/%41" + end + end + + describe "query" do + it "allows no query" do + expect(HTTP::URI::NORMALIZER.call("http://example.com").query).to be_nil + end + + it "percent-encodes control characters in query" do + expect(HTTP::URI::NORMALIZER.call("http://example.com/?\x00\x7F\n").query).to eq "%00%7F%0A" + end + + it "percent-encodes space in query" do + expect(HTTP::URI::NORMALIZER.call("http://example.com/?a b").query).to eq "a%20b" + end + + it "percent-encodes non-ASCII characters in query" do + expect(HTTP::URI::NORMALIZER.call("http://example.com?キョ").query).to eq "%E3%82%AD%E3%83%A7" + end + + it "does not percent-encode non-special characters in query" do + expect(HTTP::URI::NORMALIZER.call("http://example.com/?~.-_!$&()*,;=:@{}?").query).to eq "~.-_!$&()*,;=:@{}?" + end + + it "preserves escape sequences in query" do + expect(HTTP::URI::NORMALIZER.call("http://example.com/?%41").query).to eq "%41" + end + end +end