From 3826c87803f644325f88a33fa577745648edb269 Mon Sep 17 00:00:00 2001 From: David Backeus Date: Tue, 2 May 2023 16:54:07 +0200 Subject: [PATCH] DIY Elasticsearch --- app/controllers/search_controller.rb | 5 + app/jobs/elasticsearch_delete_job.rb | 5 + app/jobs/elasticsearch_index_job.rb | 8 ++ app/models/concerns/elasticsearchable.rb | 30 ++++++ app/models/elasticsearch.rb | 125 +++++++++++++++++++++++ app/models/link.rb | 10 ++ app/models/post.rb | 6 ++ app/views/layouts/application.html.erb | 1 + app/views/search/index.html.erb | 16 +++ config/routes.rb | 3 +- 10 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 app/controllers/search_controller.rb create mode 100644 app/jobs/elasticsearch_delete_job.rb create mode 100644 app/jobs/elasticsearch_index_job.rb create mode 100644 app/models/concerns/elasticsearchable.rb create mode 100644 app/models/elasticsearch.rb create mode 100644 app/views/search/index.html.erb diff --git a/app/controllers/search_controller.rb b/app/controllers/search_controller.rb new file mode 100644 index 0000000..cec1619 --- /dev/null +++ b/app/controllers/search_controller.rb @@ -0,0 +1,5 @@ +class SearchController < ApplicationController + def index + @results = Elasticsearch.search(params[:query]) + end +end diff --git a/app/jobs/elasticsearch_delete_job.rb b/app/jobs/elasticsearch_delete_job.rb new file mode 100644 index 0000000..eac2277 --- /dev/null +++ b/app/jobs/elasticsearch_delete_job.rb @@ -0,0 +1,5 @@ +class ElasticsearchDeleteJob < ApplicationJob + def perform(elasticsearch_id) + Elasticsearch.delete(elasticsearch_id) + end +end diff --git a/app/jobs/elasticsearch_index_job.rb b/app/jobs/elasticsearch_index_job.rb new file mode 100644 index 0000000..34ab6ab --- /dev/null +++ b/app/jobs/elasticsearch_index_job.rb @@ -0,0 +1,8 @@ +class ElasticsearchIndexJob < ApplicationJob + def perform(klass, id) + model = klass.constantize.find_by_id(id) + return unless model + + Elasticsearch.index(model) + end +end diff --git a/app/models/concerns/elasticsearchable.rb b/app/models/concerns/elasticsearchable.rb new file mode 100644 index 0000000..200b842 --- /dev/null +++ b/app/models/concerns/elasticsearchable.rb @@ -0,0 +1,30 @@ +module Elasticsearchable + extend ActiveSupport::Concern + + included do + after_commit on: %i[create update], if: :should_index? do + ElasticsearchIndexJob.perform_later(self.class.name, id) + end + + after_commit on: :destroy do + ElasticsearchDeleteJob.perform_later(elasticsearch_id) + end + end + + # Override this method to control when the model should be indexed + def should_index? + true + end + + def elasticsearch_id + "#{self.class.name}-#{id}" + end + + def elasticsearch_title + title + end + + def elasticsearch_content + raise NotImplementedError + end +end diff --git a/app/models/elasticsearch.rb b/app/models/elasticsearch.rb new file mode 100644 index 0000000..11f8d90 --- /dev/null +++ b/app/models/elasticsearch.rb @@ -0,0 +1,125 @@ +module Elasticsearch + INDEX = "searchables".freeze + + def self.index(active_record_instance) + connection_pool.with do |client| + client.index( + INDEX, + active_record_instance.elasticsearch_id, + title: active_record_instance.elasticsearch_title, + content: active_record_instance.elasticsearch_content, + updated_at: active_record_instance.updated_at, + created_at: active_record_instance.created_at, + ) + end + end + + def self.delete(id) + connection_pool.with do |client| + client.delete(INDEX, id) + end + end + + def self.search(query) + return [] if query.blank? + + result = connection_pool.with do |client| + client.search( + INDEX, + _source: false, + stored_fields: %w[_id], + query: { + multi_match: { + query: query, + fields: %w[title^2 content], + }, + }, + ) + end + + ids = result.fetch(:hits).fetch(:hits).map { |hit| hit.fetch(:_id) } + + activerecord_class_and_ids = + ids.each_with_object({}) do |id, hash| + klass, id = id.split("-") + hash[klass] ||= [] + hash[klass] << id + end + + instances = activerecord_class_and_ids.flat_map do |klass, ids| + klass.constantize.where(id: ids) + end + + instances.sort_by do |instance| + ids.index(instance.elasticsearch_id) + end + end + + def self.connection_pool + @connection_pool ||= ConnectionPool.new(size: (ENV["RAILS_MAX_THREADS"] || 5).to_i, timeout: 5) do + Client.new + end + end + + class Client + HttpError = Class.new(StandardError) + + REQUEST_METHOD_TO_CLASS = { + get: Net::HTTP::Get, + post: Net::HTTP::Post, + put: Net::HTTP::Put, + delete: Net::HTTP::Delete, + }.freeze + + def initialize + @url = ENV["ELASTICSEARCH_URL"] || "http://localhost:9200" + end + + # https://www.elastic.co/guide/en/elasticsearch/reference/7.17/docs-index_.html#docs-index-api-request + def index(index, id, document) + request(:put, "#{index}/_doc/#{id}", document) + end + + # https://www.elastic.co/guide/en/elasticsearch/reference/7.17/docs-delete.html#docs-delete-api-request + def delete(index, id) + request(:delete, "#{index}/_doc/#{id}") + end + + # Search API reference: + # https://www.elastic.co/guide/en/elasticsearch/reference/7.17/search-search.html#search-search + # Query body reference: + # https://www.elastic.co/guide/en/elasticsearch/reference/7.17/search-search.html#search-search-api-request-body + def search(index, query) + request(:get, "#{index}/_search", query) + end + + def request(method, path, params = nil) + uri = URI("#{@url}/#{path}") + + request = REQUEST_METHOD_TO_CLASS.fetch(method).new(uri) + request.content_type = "application/json" + request.body = params&.to_json + + Rails.logger.debug "[Elasticsearch/request] #{request.method} #{request.uri} #{request.body}" if Rails.logger.debug? + + response = connection.request(request) + + Rails.logger.debug "[Elasticsearch/response] #{response.code}, body: #{response.body}" if Rails.logger.debug? + + raise HttpError, "status: #{response.code}, body: #{response.body}" unless response.is_a?(Net::HTTPSuccess) + + JSON.parse(response.body, symbolize_names: true) if response.body.present? + end + + private + + def connection + @connection ||= begin + uri = URI.parse(@url) + http = Net::HTTP.new(uri.host, uri.port) + http.use_ssl = uri.scheme == "https" + http + end + end + end +end diff --git a/app/models/link.rb b/app/models/link.rb index a3d9338..ecdba8c 100644 --- a/app/models/link.rb +++ b/app/models/link.rb @@ -1,4 +1,6 @@ class Link < ApplicationRecord + include Elasticsearchable + validates_presence_of :url validate :validate_format_of_url validates_inclusion_of :state, in: %w[pending success error] @@ -6,8 +8,16 @@ class Link < ApplicationRecord after_create_commit :enqueue_crawl_job after_update_commit -> { broadcast_replace_later_to "links", target: "link_#{id}" } + def elasticsearch_content + description + end + private + def should_index? + state == "success" + end + def enqueue_crawl_job CrawlLinkJob.perform_later(id) end diff --git a/app/models/post.rb b/app/models/post.rb index 428b052..b1efb1d 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -1,4 +1,10 @@ class Post < ApplicationRecord + include Elasticsearchable + validates_presence_of :title validates_presence_of :body + + def elasticsearch_content + body + end end diff --git a/app/views/layouts/application.html.erb b/app/views/layouts/application.html.erb index 79f29b8..06e5cdb 100644 --- a/app/views/layouts/application.html.erb +++ b/app/views/layouts/application.html.erb @@ -14,6 +14,7 @@ <%= yield %> diff --git a/app/views/search/index.html.erb b/app/views/search/index.html.erb new file mode 100644 index 0000000..d8f6087 --- /dev/null +++ b/app/views/search/index.html.erb @@ -0,0 +1,16 @@ +

Search

+ +<%= form_with url: search_path, method: :get do |form| %> + <%= form.label :query %> + <%= form.text_field :query, value: params[:query], autofocus: true %> + <%= form.submit "Search" %> +<% end %> + +<% if @results.present? %> +

Results

+ +<% end %> \ No newline at end of file diff --git a/config/routes.rb b/config/routes.rb index 82f0548..c08d83d 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -1,7 +1,8 @@ Rails.application.routes.draw do resources :links resources :posts - # Define your application routes per the DSL in https://guides.rubyonrails.org/routing.html + + get "search" => "search#index", as: :search # Reveal health status on /up that returns 200 if the app boots with no exceptions, otherwise 500. # Can be used by load balancers and uptime monitors to verify that the app is live.