From 470203f8b99c37f30e3640a9a451b84b37194c46 Mon Sep 17 00:00:00 2001 From: Alex Watt Date: Mon, 26 Aug 2024 14:08:03 -0400 Subject: [PATCH] Cache header normalization to reduce object allocation (#789) Co-authored-by: Alexey Zapparov --- .rubocop/rspec.yml | 4 ++ .rubocop_todo.yml | 24 -------- Gemfile | 1 + lib/http/headers.rb | 67 +++++++++------------- lib/http/headers/normalizer.rb | 69 +++++++++++++++++++++++ spec/lib/http/headers/normalizer_spec.rb | 52 +++++++++++++++++ spec/lib/http/redirector_spec.rb | 11 ++-- spec/lib/http/retriable/performer_spec.rb | 2 +- spec/spec_helper.rb | 1 + 9 files changed, 161 insertions(+), 70 deletions(-) create mode 100644 lib/http/headers/normalizer.rb create mode 100644 spec/lib/http/headers/normalizer_spec.rb diff --git a/.rubocop/rspec.yml b/.rubocop/rspec.yml index 51711080..ebf556d3 100644 --- a/.rubocop/rspec.yml +++ b/.rubocop/rspec.yml @@ -1,5 +1,9 @@ RSpec/ExampleLength: CountAsOne: - array + - hash - heredoc - method_call + +RSpec/MultipleExpectations: + Max: 5 diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 79be39a4..551b5075 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -267,30 +267,6 @@ RSpec/InstanceVariable: RSpec/MessageSpies: EnforcedStyle: receive -# Offense count: 74 -# Configuration parameters: Max. -RSpec/MultipleExpectations: - Exclude: - - 'spec/lib/http/client_spec.rb' - - 'spec/lib/http/connection_spec.rb' - - 'spec/lib/http/features/auto_deflate_spec.rb' - - 'spec/lib/http/headers_spec.rb' - - 'spec/lib/http/options/body_spec.rb' - - 'spec/lib/http/options/features_spec.rb' - - 'spec/lib/http/options/form_spec.rb' - - 'spec/lib/http/options/headers_spec.rb' - - 'spec/lib/http/options/json_spec.rb' - - 'spec/lib/http/options/merge_spec.rb' - - 'spec/lib/http/options/proxy_spec.rb' - - 'spec/lib/http/redirector_spec.rb' - - 'spec/lib/http/response/body_spec.rb' - - 'spec/lib/http/response/parser_spec.rb' - - 'spec/lib/http/retriable/delay_calculator_spec.rb' - - 'spec/lib/http/retriable/performer_spec.rb' - - 'spec/lib/http/uri_spec.rb' - - 'spec/lib/http_spec.rb' - - 'spec/support/http_handling_shared.rb' - # Offense count: 9 # Configuration parameters: AllowSubject, Max. RSpec/MultipleMemoizedHelpers: diff --git a/Gemfile b/Gemfile index dc6363ee..9db61f1b 100644 --- a/Gemfile +++ b/Gemfile @@ -37,6 +37,7 @@ group :test do gem "rspec", "~> 3.10" gem "rspec-its" + gem "rspec-memory" gem "yardstick" end diff --git a/lib/http/headers.rb b/lib/http/headers.rb index 7d48b46f..5f285616 100644 --- a/lib/http/headers.rb +++ b/lib/http/headers.rb @@ -4,6 +4,7 @@ require "http/errors" require "http/headers/mixin" +require "http/headers/normalizer" require "http/headers/known" module HTTP @@ -12,12 +13,32 @@ class Headers extend Forwardable include Enumerable - # Matches HTTP header names when in "Canonical-Http-Format" - CANONICAL_NAME_RE = /\A[A-Z][a-z]*(?:-[A-Z][a-z]*)*\z/ + class << self + # Coerces given `object` into Headers. + # + # @raise [Error] if object can't be coerced + # @param [#to_hash, #to_h, #to_a] object + # @return [Headers] + def coerce(object) + unless object.is_a? self + object = case + when object.respond_to?(:to_hash) then object.to_hash + when object.respond_to?(:to_h) then object.to_h + when object.respond_to?(:to_a) then object.to_a + else raise Error, "Can't coerce #{object.inspect} to Headers" + end + end + + headers = new + object.each { |k, v| headers.add k, v } + headers + end + alias [] coerce - # Matches valid header field name according to RFC. - # @see http://tools.ietf.org/html/rfc7230#section-3.2 - COMPLIANT_NAME_RE = /\A[A-Za-z0-9!#$%&'*+\-.^_`|~]+\z/ + def normalizer + @normalizer ||= Headers::Normalizer.new + end + end # Class constructor. def initialize @@ -194,45 +215,11 @@ def merge(other) dup.tap { |dupped| dupped.merge! other } end - class << self - # Coerces given `object` into Headers. - # - # @raise [Error] if object can't be coerced - # @param [#to_hash, #to_h, #to_a] object - # @return [Headers] - def coerce(object) - unless object.is_a? self - object = case - when object.respond_to?(:to_hash) then object.to_hash - when object.respond_to?(:to_h) then object.to_h - when object.respond_to?(:to_a) then object.to_a - else raise Error, "Can't coerce #{object.inspect} to Headers" - end - end - - headers = new - object.each { |k, v| headers.add k, v } - headers - end - alias [] coerce - end - private # Transforms `name` to canonical HTTP header capitalization - # - # @param [String] name - # @raise [HeaderError] if normalized name does not - # match {HEADER_NAME_RE} - # @return [String] canonical HTTP header name def normalize_header(name) - return name if CANONICAL_NAME_RE.match?(name) - - normalized = name.split(/[\-_]/).each(&:capitalize!).join("-") - - return normalized if COMPLIANT_NAME_RE.match?(normalized) - - raise HeaderError, "Invalid HTTP header field name: #{name.inspect}" + self.class.normalizer.call(name) end # Ensures there is no new line character in the header value diff --git a/lib/http/headers/normalizer.rb b/lib/http/headers/normalizer.rb new file mode 100644 index 00000000..be623079 --- /dev/null +++ b/lib/http/headers/normalizer.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +module HTTP + class Headers + class Normalizer + # Matches HTTP header names when in "Canonical-Http-Format" + CANONICAL_NAME_RE = /\A[A-Z][a-z]*(?:-[A-Z][a-z]*)*\z/ + + # Matches valid header field name according to RFC. + # @see http://tools.ietf.org/html/rfc7230#section-3.2 + COMPLIANT_NAME_RE = /\A[A-Za-z0-9!#$%&'*+\-.^_`|~]+\z/ + + NAME_PARTS_SEPARATOR_RE = /[\-_]/ + + # @private + # Normalized header names cache + class Cache + MAX_SIZE = 200 + + def initialize + @store = {} + end + + def get(key) + @store[key] + end + alias [] get + + def set(key, value) + # Maintain cache size + @store.delete(@store.each_key.first) while MAX_SIZE <= @store.size + + @store[key] = value + end + alias []= set + end + + def initialize + @cache = Cache.new + end + + # Transforms `name` to canonical HTTP header capitalization + def call(name) + name = -name.to_s + value = (@cache[name] ||= -normalize_header(name)) + + value.dup + end + + private + + # Transforms `name` to canonical HTTP header capitalization + # + # @param [String] name + # @raise [HeaderError] if normalized name does not + # match {COMPLIANT_NAME_RE} + # @return [String] canonical HTTP header name + def normalize_header(name) + return name if CANONICAL_NAME_RE.match?(name) + + normalized = name.split(NAME_PARTS_SEPARATOR_RE).each(&:capitalize!).join("-") + + return normalized if COMPLIANT_NAME_RE.match?(normalized) + + raise HeaderError, "Invalid HTTP header field name: #{name.inspect}" + end + end + end +end diff --git a/spec/lib/http/headers/normalizer_spec.rb b/spec/lib/http/headers/normalizer_spec.rb new file mode 100644 index 00000000..09d7889d --- /dev/null +++ b/spec/lib/http/headers/normalizer_spec.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +RSpec.describe HTTP::Headers::Normalizer do + subject(:normalizer) { described_class.new } + + include_context RSpec::Memory + + describe "#call" do + it "normalizes the header" do + expect(normalizer.call("content_type")).to eq "Content-Type" + end + + it "returns a non-frozen string" do + expect(normalizer.call("content_type")).not_to be_frozen + end + + it "evicts the oldest item when cache is full" do + max_headers = (1..described_class::Cache::MAX_SIZE).map { |i| "Header#{i}" } + max_headers.each { |header| normalizer.call(header) } + normalizer.call("New-Header") + cache_store = normalizer.instance_variable_get(:@cache).instance_variable_get(:@store) + expect(cache_store.keys).to eq(max_headers[1..] + ["New-Header"]) + end + + it "retuns mutable strings" do + normalized_headers = Array.new(3) { normalizer.call("content_type") } + + expect(normalized_headers) + .to satisfy { |arr| arr.uniq.size == 1 } + .and(satisfy { |arr| arr.map(&:object_id).uniq.size == normalized_headers.size }) + .and(satisfy { |arr| arr.none?(&:frozen?) }) + end + + it "allocates minimal memory for normalization of the same header" do + normalizer.call("accept") # XXX: Ensure normalizer is pre-allocated + + # On first call it is expected to allocate during normalization + expect { normalizer.call("content_type") }.to limit_allocations( + Array => 1, + MatchData => 1, + String => 6 + ) + + # On subsequent call it is expected to only allocate copy of a cached string + expect { normalizer.call("content_type") }.to limit_allocations( + Array => 0, + MatchData => 0, + String => 1 + ) + end + end +end diff --git a/spec/lib/http/redirector_spec.rb b/spec/lib/http/redirector_spec.rb index e182129a..e5c505cd 100644 --- a/spec/lib/http/redirector_spec.rb +++ b/spec/lib/http/redirector_spec.rb @@ -117,12 +117,13 @@ def redirect_response(status, location, set_cookie = {}) expect(req_cookie).to eq request_cookies.shift hops.shift end + expect(res.to_s).to eq "bar" - cookies = res.cookies.cookies.to_h { |c| [c.name, c.value] } - expect(cookies["foo"]).to eq "42" - expect(cookies["bar"]).to eq "53" - expect(cookies["baz"]).to eq "65" - expect(cookies["deleted"]).to eq nil + expect(res.cookies.cookies.to_h { |c| [c.name, c.value] }).to eq({ + "foo" => "42", + "bar" => "53", + "baz" => "65" + }) end it "returns original cookies in response" do diff --git a/spec/lib/http/retriable/performer_spec.rb b/spec/lib/http/retriable/performer_spec.rb index 60c0e12c..38d693c2 100644 --- a/spec/lib/http/retriable/performer_spec.rb +++ b/spec/lib/http/retriable/performer_spec.rb @@ -199,7 +199,7 @@ def response(**options) end describe "should_retry option" do - it "decides if the request should be retried" do + it "decides if the request should be retried" do # rubocop:disable RSpec/MultipleExpectations retry_proc = proc do |req, err, res, attempt| expect(req).to eq request if res diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 1bb40781..1bdbf00b 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -5,6 +5,7 @@ require "http" require "rspec/its" +require "rspec/memory" require "support/capture_warning" require "support/fakeio"