diff --git a/app/models/public_body.rb b/app/models/public_body.rb index 12dde0457c..25ff55ea8a 100644 --- a/app/models/public_body.rb +++ b/app/models/public_body.rb @@ -33,6 +33,7 @@ require 'confidence_intervals' class PublicBody < ApplicationRecord + include CalculatedHomePage include Taggable include Notable include Rails.application.routes.url_helpers @@ -402,19 +403,6 @@ def legislation legislations.first end - # Guess home page from the request email, or use explicit override, or nil - # if not known. - # - # TODO: PublicBody#calculated_home_page would be a good candidate to cache - # in an instance variable - def calculated_home_page - if home_page && !home_page.empty? - home_page[URI.regexp(%w(http https))] ? home_page : "https://#{home_page}" - elsif request_email_domain - "https://www.#{request_email_domain}" - end - end - # The "internal admin" is a special body for internal use. def self.internal_admin_body matching_pbs = AlaveteliLocalization. diff --git a/app/models/public_body/calculated_home_page.rb b/app/models/public_body/calculated_home_page.rb new file mode 100644 index 0000000000..afedc45c63 --- /dev/null +++ b/app/models/public_body/calculated_home_page.rb @@ -0,0 +1,55 @@ +# Guess the home page based on the request email domain. +module PublicBody::CalculatedHomePage + extend ActiveSupport::Concern + + included do + cattr_accessor :excluded_calculated_home_page_domains, default: %w[ + aol.com + gmail.com + googlemail.com + gmx.com + hotmail.com + icloud.com + live.com + mac.com + mail.com + mail.ru + me.com + outlook.com + protonmail.com + qq.com + yahoo.com + yandex.com + ymail.com + zoho.com + ] + end + + def calculated_home_page + @calculated_home_page ||= calculated_home_page! + end + + private + + # Ensure known home page has a full URL or guess if not known. + def calculated_home_page! + ensure_home_page_protocol || guess_home_page + end + + # Ensure the home page has the HTTP protocol at the start of the URL + def ensure_home_page_protocol + return unless home_page.present? + home_page[URI.regexp(%w(http https))] ? home_page : "https://#{home_page}" + end + + # Guess the home page from the request address email domain. + def guess_home_page + return unless request_email_domain + return if excluded_calculated_home_page_domain?(request_email_domain) + "https://www.#{request_email_domain}" + end + + def excluded_calculated_home_page_domain?(domain) + excluded_calculated_home_page_domains.include?(domain) + end +end diff --git a/spec/models/public_body_spec.rb b/spec/models/public_body_spec.rb index 6a1e4e9171..a1c8ec0863 100644 --- a/spec/models/public_body_spec.rb +++ b/spec/models/public_body_spec.rb @@ -1920,36 +1920,63 @@ def set_default_attributes(public_body) end RSpec.describe PublicBody do + around do |example| + previous = PublicBody.excluded_calculated_home_page_domains + PublicBody.excluded_calculated_home_page_domains = %w[example.net] + example.run + PublicBody.excluded_calculated_home_page_domains = previous + end - describe "calculated home page" do - it "should return the home page verbatim if it's present" do - public_body = PublicBody.new - public_body.home_page = "http://www.example.com" - expect(public_body.calculated_home_page).to eq("http://www.example.com") + describe 'calculated home page' do + it "returns the home page verbatim if it's present" do + public_body = PublicBody.new(home_page: 'http://www.example.com') + expect(public_body.calculated_home_page).to eq('http://www.example.com') + end + + it 'ensures home page URLs start with https://' do + public_body = PublicBody.new(home_page: 'example.com') + expect(public_body.calculated_home_page).to eq('https://example.com') end - it "should return the home page based on the request email domain if it has one" do + it 'does not add http when https is present' do + public_body = PublicBody.new(home_page: 'https://example.com') + expect(public_body.calculated_home_page).to eq('https://example.com') + end + + it 'returns the home page based on the request email domain if it has one' do public_body = PublicBody.new - allow(public_body).to receive(:request_email_domain).and_return "public-authority.com" - expect(public_body.calculated_home_page).to eq("https://www.public-authority.com") + + allow(public_body). + to receive(:request_email_domain).and_return('public-authority.com') + + expect(public_body.calculated_home_page). + to eq('https://www.public-authority.com') end - it "should return nil if there's no home page and the email domain can't be worked out" do + it "returns nil if there's no home page and the email domain can't be worked out" do public_body = PublicBody.new - allow(public_body).to receive(:request_email_domain).and_return nil + allow(public_body).to receive(:request_email_domain).and_return(nil) expect(public_body.calculated_home_page).to be_nil end - it "should ensure home page URLs start with https://" do - public_body = PublicBody.new - public_body.home_page = "example.com" - expect(public_body.calculated_home_page).to eq("https://example.com") + it 'ensures home page URLs start with https://' do + public_body = PublicBody.new(home_page: 'example.com') + expect(public_body.calculated_home_page).to eq('https://example.com') end - it "should not add http when https is present" do - public_body = PublicBody.new - public_body.home_page = "https://example.com" - expect(public_body.calculated_home_page).to eq("https://example.com") + it 'does not add http when https is present' do + public_body = PublicBody.new(home_page: 'https://example.com') + expect(public_body.calculated_home_page).to eq('https://example.com') + end + + it 'does not calculate the homepage for excluded domains' do + public_body = PublicBody.new(request_email: 'x@example.net') + expect(public_body.calculated_home_page).to be_nil + end + + it 'ignores case sensitivity for excluded domains' do + public_body = PublicBody.new(request_email: 'x@EXAMPLE.net') + expect(public_body.calculated_home_page).to be_nil end end