Skip to content

Commit

Permalink
Verify links in version metadata & homepage (#4062)
Browse files Browse the repository at this point in the history
* Verify links in version metadata & homepage

So they can be displayed distinctively in gem (& eventually user) pages.

* Use beginning of day in verification queries

Using a constant time makes the queries cachable

---------

Co-authored-by: Arlette Thibodeau <[email protected]>
  • Loading branch information
segiddins and arletterocks authored Oct 23, 2023
1 parent 8907e31 commit c3c61e1
Show file tree
Hide file tree
Showing 22 changed files with 714 additions and 50 deletions.
8 changes: 6 additions & 2 deletions app/assets/stylesheets/modules/gem.css
Original file line number Diff line number Diff line change
Expand Up @@ -210,12 +210,16 @@
background-color: #141c22;
-ms-transform: rotate(45deg);
-webkit-transform: rotate(45deg);
transform: rotate(45deg);
}
transform: rotate(45deg); }

.gem__link:before {
margin-right: 16px; }

.gem__link__verified:after {
margin-left: 8px;
font-size: .75rem;
content: "✓"; }

.gem__versions-wrap {
margin-bottom: 70px;
overflow: auto; }
Expand Down
19 changes: 19 additions & 0 deletions app/avo/resources/link_verification_resource.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
class LinkVerificationResource < Avo::BaseResource
self.title = :id
self.includes = []
# self.search_query = -> do
# scope.ransack(id_eq: params[:q], m: "or").result(distinct: false)
# end

field :id, as: :id
# Fields generated from the model
field :linkable, as: :belongs_to,
polymorphic_as: :linkable,
types: [::Rubygem]
field :uri, as: :text
field :verified?, as: :boolean
field :last_verified_at, as: :date_time
field :last_failure_at, as: :date_time
field :failures_since_last_verification, as: :number
# add fields here
end
4 changes: 4 additions & 0 deletions app/controllers/avo/link_verifications_controller.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# This controller has been generated to enable Rails' resource routes.
# More information on https://docs.avohq.io/2.0/controllers.html
class Avo::LinkVerificationsController < Avo::ResourcesController
end
6 changes: 4 additions & 2 deletions app/helpers/rubygems_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ def formatted_licenses(license_names)
end
end

def link_to_page(id, url)
link_to(t("rubygems.aside.links.#{id}"), url, rel: "nofollow", class: %w[gem__link t-list__item], id: id) if url.present?
def link_to_page(id, url, verified: false)
classes = %w[gem__link t-list__item]
classes << "gem__link__verified" if verified
link_to(t("rubygems.aside.links.#{id}"), url, rel: "nofollow", class: classes, id: id) if url.present?
end

def link_to_directory
Expand Down
102 changes: 102 additions & 0 deletions app/jobs/verify_link_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
class VerifyLinkJob < ApplicationJob
queue_as :default

retry_on ActiveRecord::RecordNotFound, ActiveRecord::RecordInvalid, wait: :exponentially_longer, attempts: 3

ERRORS = (HTTP_ERRORS + [Faraday::Error, SocketError, SystemCallError, OpenSSL::SSL::SSLError]).freeze

class NotHTTPSError < StandardError; end
class LinkNotPresentError < StandardError; end
class HTTPResponseError < StandardError; end

discard_on NotHTTPSError do |job, _error|
job.record_failure
end

rescue_from LinkNotPresentError, HTTPResponseError, *ERRORS do |error|
logger.info "Linkback verification failed with error: #{error.message}", error: error, uri: link_verification.uri,
linkable: link_verification.linkable

link_verification.transaction do
record_failure
if should_retry?
retry_job(wait: 5.seconds * (3.5**link_verification.failures_since_last_verification.pred), error:)
else
instrument :retry_stopped, error: error
end
end
end

TIMEOUT_SEC = 5

def perform(link_verification:)
verify_link!(link_verification.uri, link_verification.linkable)
record_success
end

def link_verification
arguments.first.fetch(:link_verification)
end

def verify_link!(uri, linkable)
raise NotHTTPSError unless uri.start_with?("https://")

expected_href = linkable.linkable_verification_uri.to_s.downcase

response = get(uri)
raise HTTPResponseError, "Expected 200, got #{response.status}" unless response.status == 200
# TODO: body_with_limit, https://github.com/mastodon/mastodon/blob/33c8708a1ac7df363bf2bd74ab8fa2ed7168379c/app/lib/request.rb#L246
doc = Nokogiri::HTML5(response.body)

xpaths = [
# rel=me, what mastodon uses for profile link verification
'//a[contains(concat(" ", normalize-space(@rel), " "), " me ")]',
'//link[contains(concat(" ", normalize-space(@rel), " "), " me ")]',

# rel=rubygem
'//a[contains(concat(" ", normalize-space(@rel), " "), " rubygem ")]',
'//link[contains(concat(" ", normalize-space(@rel), " "), " rubygem ")]'
]

if URI(uri).host == "github.com"
# github doesn't set a rel attribute on the URL added to a repo, so we have to use role=link instead
xpaths << '//a[contains(concat(" ", normalize-space(@role), " "), " link ")]'
xpaths << '//link[contains(concat(" ", normalize-space(@role), " "), " link ")]'
end

links = doc.xpath(xpaths.join("|"))

return if links.any? { |link| link["href"]&.downcase == expected_href }
raise LinkNotPresentError, "Expected #{expected_href} to be present in #{uri}"
end

def get(url)
Faraday.new(nil, request: { timeout: TIMEOUT_SEC }) do |f|
f.response :logger, logger, headers: false, errors: true
f.response :raise_error
end.get(
url,
{},
{
"User-Agent" => "RubyGems.org Linkback Verification/#{AppRevision.version}",
"Accept" => "text/html"
}
)
end

def should_retry?
link_verification.failures_since_last_verification < LinkVerification::MAX_FAILURES
end

def record_success
link_verification.update!(
last_verified_at: Time.current,
failures_since_last_verification: 0
)
end

def record_failure
link_verification.touch(:last_failure_at)
link_verification.increment!(:failures_since_last_verification)
end
end
80 changes: 80 additions & 0 deletions app/models/link_verification.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
class LinkVerification < ApplicationRecord
belongs_to :linkable, polymorphic: true

MAX_FAILURES = 10
VALIDITY = 1.month

def self.verified
where(last_verified_at: VALIDITY.ago.beginning_of_day..)
end

def self.unverified
never_verified
.or(last_verified_before(VALIDITY.ago.beginning_of_day))
end

def self.never_verified
where(last_verified_at: nil)
end

def self.last_verified_before(time)
where(last_verified_at: ...time)
end

def self.pending_verification
never_verified
.or(last_verified_before(3.weeks.ago.beginning_of_day))
.where(failures_since_last_verification: 0)
.https_uri
end

def self.https_uri
where(arel_table[:uri].matches("https://%"))
end

def self.linkable(linkable)
where(linkable:)
end

def self.for_uri(uri)
where(uri:)
end

def unverified?
!verified?
end

def verified?
return false unless (verified_at = last_verified_at.presence)

verified_at > VALIDITY.ago
end

def should_verify?
return false unless https?
return false unless failures_since_last_verification <= 0

unverified? || last_verified_at.before?(3.weeks.ago.beginning_of_day)
end

def verify_later
VerifyLinkJob.perform_later(link_verification: self)
end

def retry_if_needed
if previously_new_record? && should_verify?
verify_later
return self
end

return unless https?
return unless failures_since_last_verification.positive? && last_failure_at.present?
return unless last_verified_at.nil? || last_verified_at.before?(last_failure_at)

update!(failures_since_last_verification: 0)
end

def https?
uri.start_with?("https://")
end
end
8 changes: 7 additions & 1 deletion app/models/links.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@ class Links
"docs" => "documentation_uri"
}.freeze

attr_accessor :rubygem, :version, :linkset
attr_accessor :rubygem, :version, :linkset, :link_verifications

def initialize(rubygem, version)
self.rubygem = rubygem
self.version = version
self.linkset = rubygem.linkset
self.link_verifications = rubygem.link_verifications.verified.for_uri(version ? each.map { |_, url| url } : []).strict_loading
end

def links
Expand All @@ -45,6 +46,11 @@ def each
end
end

def verified?(uri)
# intentionally using #any? here because we want to ensure the query is materialized only once to avoid an N+1
link_verifications.any? { |lv| lv.uri == uri }
end

# documentation uri:
# if metadata has it defined, use that
# or if linksets has it defined, use that
Expand Down
7 changes: 7 additions & 0 deletions app/models/linkset.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
class Linkset < ApplicationRecord
belongs_to :rubygem

before_save :create_homepage_link_verification, if: :home_changed?

LINKS = %w[home code docs wiki mail bugs].freeze

LINKS.each do |url|
Expand All @@ -18,4 +20,9 @@ def empty?
def update_attributes_from_gem_specification!(spec)
update!(home: spec.homepage)
end

def create_homepage_link_verification
return if home.blank?
rubygem.link_verifications.find_or_create_by!(uri: home).retry_if_needed
end
end
5 changes: 5 additions & 0 deletions app/models/rubygem.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Rubygem < ApplicationRecord
has_many :ownership_calls, -> { opened }, dependent: :destroy, inverse_of: :rubygem
has_many :ownership_requests, -> { opened }, dependent: :destroy, inverse_of: :rubygem
has_many :audits, as: :auditable, inverse_of: :auditable
has_many :link_verifications, as: :linkable, inverse_of: :linkable, dependent: :destroy

validates :name,
length: { maximum: Gemcutter::MAX_FIELD_LENGTH },
Expand Down Expand Up @@ -391,6 +392,10 @@ def yank_versions!(version_id: nil)
end
end

def linkable_verification_uri
URI.join("https://rubygems.org/gem/", name)
end

private

# a gem namespace is not protected if it is
Expand Down
9 changes: 9 additions & 0 deletions app/models/version.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class Version < ApplicationRecord # rubocop:disable Metrics/ClassLength
before_validation :set_canonical_number, if: :number_changed?
before_validation :full_nameify!
before_validation :gem_full_nameify!
before_save :create_link_verifications, if: :metadata_changed?
before_save :update_prerelease, if: :number_changed?
# TODO: Remove this once we move to GemDownload only
after_create :create_gem_download
Expand Down Expand Up @@ -496,4 +497,12 @@ def no_dashes_in_version_number
return unless number&.include?("-")
errors.add(:number, "cannot contain a dash (it will be uninstallable)")
end

def create_link_verifications
Links::LINKS.each_value do |long|
uri = metadata[long]
next if uri.blank?
rubygem.link_verifications.find_or_create_by!(uri:).retry_if_needed
end
end
end
10 changes: 10 additions & 0 deletions app/policies/link_verification_policy.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
class LinkVerificationPolicy < ApplicationPolicy
class Scope < Scope
def resolve
scope.all
end
end

def avo_index? = rubygems_org_admin?
def avo_show? = rubygems_org_admin?
end
2 changes: 1 addition & 1 deletion app/views/rubygems/_aside.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
<h3 class="t-list__heading"><%= t '.links.header' %>:</h3>
<div class="t-list__items">
<%- @versioned_links.each do |name, link| %>
<%= link_to_page name, link %>
<%= link_to_page name, link, verified: @versioned_links.verified?(link) %>
<%- end %>
<%= change_diff_link(@rubygem, @latest_version) %>
<%= badge_link(@rubygem) %>
Expand Down
15 changes: 15 additions & 0 deletions db/migrate/20230905152206_create_link_verifications.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
class CreateLinkVerifications < ActiveRecord::Migration[7.0]
def change
create_table :link_verifications do |t|
t.references :linkable, polymorphic: true, null: false
t.string :uri, null: false
t.datetime :last_verified_at, null: true
t.datetime :last_failure_at, null: true
t.integer :failures_since_last_verification, default: 0

t.timestamps

t.index ["linkable_id", "linkable_type", "uri"], name: "index_link_verifications_on_linkable_and_uri"
end
end
end
13 changes: 13 additions & 0 deletions db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,19 @@
t.index ["scheduled_at"], name: "index_good_jobs_on_scheduled_at", where: "(finished_at IS NULL)"
end

create_table "link_verifications", force: :cascade do |t|
t.string "linkable_type", null: false
t.bigint "linkable_id", null: false
t.string "uri", null: false
t.datetime "last_verified_at"
t.datetime "last_failure_at"
t.integer "failures_since_last_verification", default: 0
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["linkable_id", "linkable_type", "uri"], name: "index_link_verifications_on_linkable_and_uri"
t.index ["linkable_type", "linkable_id"], name: "index_link_verifications_on_linkable"
end

create_table "linksets", id: :serial, force: :cascade do |t|
t.integer "rubygem_id"
t.string "home"
Expand Down
Loading

0 comments on commit c3c61e1

Please sign in to comment.