diff --git a/Gemfile b/Gemfile index 5a25838..d5ae2db 100644 --- a/Gemfile +++ b/Gemfile @@ -7,6 +7,8 @@ gem "rails", github: "rails/rails", branch: "main" gem "bootsnap", require: false gem "dotenv-rails" +gem "elasticsearch-model" +gem "elasticsearch-rails" gem "importmap-rails" gem "opengraph_parser" gem "pg" diff --git a/Gemfile.lock b/Gemfile.lock index a22d4ab..7eb606c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -111,9 +111,46 @@ GEM dotenv-rails (2.8.1) dotenv (= 2.8.1) railties (>= 3.2) + elasticsearch (7.17.7) + elasticsearch-api (= 7.17.7) + elasticsearch-transport (= 7.17.7) + elasticsearch-api (7.17.7) + multi_json + elasticsearch-model (7.2.1) + activesupport (> 3) + elasticsearch (~> 7) + hashie + elasticsearch-rails (7.2.1) + elasticsearch-transport (7.17.7) + faraday (~> 1) + multi_json erubi (1.12.0) + faraday (1.10.3) + faraday-em_http (~> 1.0) + faraday-em_synchrony (~> 1.0) + faraday-excon (~> 1.1) + faraday-httpclient (~> 1.0) + faraday-multipart (~> 1.0) + faraday-net_http (~> 1.0) + faraday-net_http_persistent (~> 1.0) + faraday-patron (~> 1.0) + faraday-rack (~> 1.0) + faraday-retry (~> 1.0) + ruby2_keywords (>= 0.0.4) + faraday-em_http (1.0.0) + faraday-em_synchrony (1.0.0) + faraday-excon (1.1.0) + faraday-httpclient (1.0.1) + faraday-multipart (1.0.4) + multipart-post (~> 2) + faraday-net_http (1.0.1) + faraday-net_http_persistent (1.2.0) + faraday-patron (1.0.0) + faraday-rack (1.0.0) + faraday-retry (1.0.3) globalid (1.1.0) activesupport (>= 5.0) + hashie (5.0.0) i18n (1.12.0) concurrent-ruby (~> 1.0) importmap-rails (1.1.5) @@ -134,6 +171,8 @@ GEM mini_mime (1.1.2) minitest (5.18.0) msgpack (1.7.0) + multi_json (1.15.0) + multipart-post (2.3.0) net-imap (0.3.4) date net-protocol @@ -183,6 +222,7 @@ GEM connection_pool reline (0.3.3) io-console (~> 0.5) + ruby2_keywords (0.0.5) sidekiq (7.0.7) concurrent-ruby (< 2) connection_pool (>= 2.3.0) @@ -217,6 +257,8 @@ PLATFORMS DEPENDENCIES bootsnap dotenv-rails + elasticsearch-model + elasticsearch-rails importmap-rails opengraph_parser pg diff --git a/app/controllers/search_controller.rb b/app/controllers/search_controller.rb new file mode 100644 index 0000000..8562ae6 --- /dev/null +++ b/app/controllers/search_controller.rb @@ -0,0 +1,5 @@ +class SearchController < ApplicationController + def index + @results = Searchable.search(params[:query]) + end +end diff --git a/app/jobs/indexer_job.rb b/app/jobs/indexer_job.rb new file mode 100644 index 0000000..2c8f7ed --- /dev/null +++ b/app/jobs/indexer_job.rb @@ -0,0 +1,25 @@ +class IndexerJob < ApplicationJob + def perform(operation, klass, id) + klass = klass.constantize + + case operation + when "create" + model = klass.find_by_id(id) + return unless model + + model.__elasticsearch__.index_document + when /update/ + model = klass.find_by_id(id) + return unless model + + model.__elasticsearch__.update_document + when /delete/ + begin + klass.__elasticsearch__.client.delete(index: klass.index_name, id: id) + rescue Elasticsearch::Transport::Transport::Errors::NotFound # rubocop:disable Lint/SuppressedException + end + else + raise ArgumentError, "Unknown operation '#{operation}'" + end + end +end diff --git a/app/models/concerns/searchable.rb b/app/models/concerns/searchable.rb new file mode 100644 index 0000000..e6864b6 --- /dev/null +++ b/app/models/concerns/searchable.rb @@ -0,0 +1,70 @@ +# Description: +# Include this module in models that should be searchable. +# +# Models including the concern are expected to implement 'title' and 'searchable_content' +# for indexing. Title will be given higher priority compared to content. +# +# After adding and indexing your models. Search across all models can be performed via +# the Searchable.search method. + +module Searchable + extend ActiveSupport::Concern + + mattr_accessor :models + self.models = [] + + def self.search(query) + return [] if query.blank? + + search_definition = { + query: { + multi_match: { + query: query, + fields: ["title^2", "content"], + }, + }, + } + + Elasticsearch::Model.search(search_definition, models).records + end + + def as_indexed_json(_options = {}) + { + title: title, + content: searchable_content, + } + end + + def should_index? + true + end + + def searchable_content + raise NotImplementedError + end + + included do + include Elasticsearch::Model + + Searchable.models << self + + after_commit on: :create, if: :should_index? do + IndexerJob.perform_later("create", self.class.name, id) + end + + after_commit on: :update, if: :should_index? do + IndexerJob.perform_later("update", self.class.name, id) + end + + after_commit on: :destroy do + IndexerJob.perform_later("delete", self.class.name, id) + end + + settings index: { number_of_shards: 1 } do + mappings dynamic: "false" do + indexes :title, analyzer: "english", boost: 2 + indexes :content, analyzer: "english" + end + end + end +end diff --git a/app/models/link.rb b/app/models/link.rb index a3d9338..c754897 100644 --- a/app/models/link.rb +++ b/app/models/link.rb @@ -1,4 +1,6 @@ class Link < ApplicationRecord + include Searchable + validates_presence_of :url validate :validate_format_of_url validates_inclusion_of :state, in: %w[pending success error] @@ -8,6 +10,14 @@ class Link < ApplicationRecord private + def should_index? + status == "success" + end + + def searchable_content + description + end + def enqueue_crawl_job CrawlLinkJob.perform_later(id) end diff --git a/app/models/post.rb b/app/models/post.rb index 428b052..b256756 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -1,4 +1,12 @@ class Post < ApplicationRecord + include Searchable + validates_presence_of :title validates_presence_of :body + + private + + def searchable_content + body + end end diff --git a/app/views/layouts/application.html.erb b/app/views/layouts/application.html.erb index 79f29b8..06e5cdb 100644 --- a/app/views/layouts/application.html.erb +++ b/app/views/layouts/application.html.erb @@ -14,6 +14,7 @@ <%= yield %> diff --git a/app/views/search/index.html.erb b/app/views/search/index.html.erb new file mode 100644 index 0000000..d8f6087 --- /dev/null +++ b/app/views/search/index.html.erb @@ -0,0 +1,16 @@ +

Search

+ +<%= form_with url: search_path, method: :get do |form| %> + <%= form.label :query %> + <%= form.text_field :query, value: params[:query], autofocus: true %> + <%= form.submit "Search" %> +<% end %> + +<% if @results.present? %> +

Results

+ +<% end %> \ No newline at end of file diff --git a/config/environments/development.rb b/config/environments/development.rb index b4663e6..7273037 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -9,7 +9,7 @@ config.enable_reloading = true # Do not eager load code on boot. - config.eager_load = false + config.eager_load = true # necessary for inclusion tracking of Searchable concern # Show full error reports. config.consider_all_requests_local = true diff --git a/config/environments/test.rb b/config/environments/test.rb index 65be650..88e024d 100644 --- a/config/environments/test.rb +++ b/config/environments/test.rb @@ -15,7 +15,7 @@ # this is usually not necessary, and can slow down your test suite. However, it's # recommended that you enable it in continuous integration systems to ensure eager # loading is working properly before deploying your code. - config.eager_load = ENV["CI"].present? + config.eager_load = true # necessary for inclusion tracking of Searchable concern # Configure public file server for tests with Cache-Control for performance. config.public_file_server.enabled = true diff --git a/config/initializers/elasticsearch.rb b/config/initializers/elasticsearch.rb new file mode 100644 index 0000000..66f55c4 --- /dev/null +++ b/config/initializers/elasticsearch.rb @@ -0,0 +1,8 @@ +# https://github.com/elastic/elasticsearch-ruby/issues/1429#issuecomment-958162468 +module Elasticsearch + class Client + def verify_with_version_or_header(*_args) + @verified = true + end + end +end diff --git a/config/routes.rb b/config/routes.rb index 82f0548..c08d83d 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -1,7 +1,8 @@ Rails.application.routes.draw do resources :links resources :posts - # Define your application routes per the DSL in https://guides.rubyonrails.org/routing.html + + get "search" => "search#index", as: :search # Reveal health status on /up that returns 200 if the app boots with no exceptions, otherwise 500. # Can be used by load balancers and uptime monitors to verify that the app is live.