Skip to content

Commit

Permalink
Elasticsearch indexing with elasticsearch-(model|rails)
Browse files Browse the repository at this point in the history
  • Loading branch information
dbackeus committed May 2, 2023
1 parent aad6aed commit f91dc9e
Show file tree
Hide file tree
Showing 6 changed files with 139 additions and 0 deletions.
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ gem "rails", github: "rails/rails", branch: "main"

gem "bootsnap", require: false
gem "dotenv-rails"
gem "elasticsearch-model"
gem "elasticsearch-rails"
gem "importmap-rails"
gem "opengraph_parser"
gem "pg"
Expand Down
42 changes: 42 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,46 @@ GEM
dotenv-rails (2.8.1)
dotenv (= 2.8.1)
railties (>= 3.2)
elasticsearch (7.17.7)
elasticsearch-api (= 7.17.7)
elasticsearch-transport (= 7.17.7)
elasticsearch-api (7.17.7)
multi_json
elasticsearch-model (7.2.1)
activesupport (> 3)
elasticsearch (~> 7)
hashie
elasticsearch-rails (7.2.1)
elasticsearch-transport (7.17.7)
faraday (~> 1)
multi_json
erubi (1.12.0)
faraday (1.10.3)
faraday-em_http (~> 1.0)
faraday-em_synchrony (~> 1.0)
faraday-excon (~> 1.1)
faraday-httpclient (~> 1.0)
faraday-multipart (~> 1.0)
faraday-net_http (~> 1.0)
faraday-net_http_persistent (~> 1.0)
faraday-patron (~> 1.0)
faraday-rack (~> 1.0)
faraday-retry (~> 1.0)
ruby2_keywords (>= 0.0.4)
faraday-em_http (1.0.0)
faraday-em_synchrony (1.0.0)
faraday-excon (1.1.0)
faraday-httpclient (1.0.1)
faraday-multipart (1.0.4)
multipart-post (~> 2)
faraday-net_http (1.0.1)
faraday-net_http_persistent (1.2.0)
faraday-patron (1.0.0)
faraday-rack (1.0.0)
faraday-retry (1.0.3)
globalid (1.1.0)
activesupport (>= 5.0)
hashie (5.0.0)
i18n (1.12.0)
concurrent-ruby (~> 1.0)
importmap-rails (1.1.5)
Expand All @@ -134,6 +171,8 @@ GEM
mini_mime (1.1.2)
minitest (5.18.0)
msgpack (1.7.0)
multi_json (1.15.0)
multipart-post (2.3.0)
net-imap (0.3.4)
date
net-protocol
Expand Down Expand Up @@ -183,6 +222,7 @@ GEM
connection_pool
reline (0.3.3)
io-console (~> 0.5)
ruby2_keywords (0.0.5)
sidekiq (7.0.7)
concurrent-ruby (< 2)
connection_pool (>= 2.3.0)
Expand Down Expand Up @@ -217,6 +257,8 @@ PLATFORMS
DEPENDENCIES
bootsnap
dotenv-rails
elasticsearch-model
elasticsearch-rails
importmap-rails
opengraph_parser
pg
Expand Down
69 changes: 69 additions & 0 deletions app/models/concerns/searchable.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Description:
# Include this module in models that should be searchable. Also add the model class
# to the Searchable::MODELS array (we're avoiding metaprogramming to keep track of
# included models since it isn't reliable in lazy loading environments).
#
# Models including the concern are expected to implement 'title' and 'searchable_content'
# for indexing. Title will be given higher priority compared to content.
#
# After adding and indexing your models. Search across all models can be performed via
# the Searchable.search method.

module Searchable
extend ActiveSupport::Concern

MODELS = [Link, Post].freeze

def self.search(query)
search_definition = {
query: {
multi_match: {
query: query,
fields: ["title^2", "content"],
},
},
}

Elasticsearch::Model.search(search_definition, MODELS).records
end

def as_indexed_json(_options = {})
{
title: title,
content: searchable_content,
}
end

def should_index?
true
end

def searchable_content
raise NotImplementedError
end

included do
include Elasticsearch::Model

raise "add #{name} to Searchable::MODELS" unless Searchable::MODELS.include?(self)

after_commit on: :create, if: :should_index? do
__elasticsearch__.index_document
end

after_commit on: :update, if: :should_index? do
__elasticsearch__.update_document
end

after_commit on: :destroy do
__elasticsearch__.delete_document
end

settings index: { number_of_shards: 1 } do
mappings dynamic: "false" do
indexes :title, analyzer: "english", boost: 2
indexes :content, analyzer: "english"
end
end
end
end
10 changes: 10 additions & 0 deletions app/models/link.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
class Link < ApplicationRecord
include Searchable

validates_presence_of :url
validate :validate_format_of_url
validates_inclusion_of :state, in: %w[pending success error]
Expand All @@ -8,6 +10,14 @@ class Link < ApplicationRecord

private

def should_index?
status == "success"
end

def searchable_content
description
end

def enqueue_crawl_job
CrawlLinkJob.perform_later(id)
end
Expand Down
8 changes: 8 additions & 0 deletions app/models/post.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
class Post < ApplicationRecord
include Searchable

validates_presence_of :title
validates_presence_of :body

private

def searchable_content
body
end
end
8 changes: 8 additions & 0 deletions config/initializers/elasticsearch.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# https://github.com/elastic/elasticsearch-ruby/issues/1429#issuecomment-958162468
module Elasticsearch
class Client
def verify_with_version_or_header(*_args)
@verified = true
end
end
end

0 comments on commit f91dc9e

Please sign in to comment.