Skip to content

Commit

Permalink
MigrateHTML helper for importing HTML into content
Browse files Browse the repository at this point in the history
This tool is in an early alpha state and requires further work and
documentation before it's ready for general deployment.
  • Loading branch information
sfnelson committed Mar 8, 2024
1 parent 89dfc0f commit a41272f
Show file tree
Hide file tree
Showing 5 changed files with 307 additions and 0 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ end

group :test do
gem "capybara"
gem "compare-xml"
gem "cuprite"
gem "faker"
gem "rails-controller-testing"
Expand Down
3 changes: 3 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ GEM
rack-test (>= 0.6.3)
regexp_parser (>= 1.5, < 3.0)
xpath (~> 3.2)
compare-xml (0.66)
nokogiri (~> 1.8)
concurrent-ruby (1.2.3)
connection_pool (2.4.1)
crass (1.0.6)
Expand Down Expand Up @@ -369,6 +371,7 @@ PLATFORMS

DEPENDENCIES
capybara
compare-xml
cuprite
dartsass-rails
erb_lint
Expand Down
133 changes: 133 additions & 0 deletions app/actions/katalyst/content/migrate_html.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# frozen_string_literal: true

module Katalyst
module Content
class MigrateHtml
include ActiveModel::Model

SUPPORTED_ROOT_TAGS = %w[br h4 h5 h6 hr ol p text ul].freeze
SUPPORTED_TRIX_TAGS = %w[h4 h5 h6 hr ol p text ul li b br em a strong span].freeze

attr_reader :model

def self.call(model, content)
new.call(model, content)
end

def call(model, content)
@model = model
@items = []
@depth = 0

root = Nokogiri::HTML5.fragment(content)

root.children.each do |node|
case node.name
when "h2"
add_section_node(heading: node.text, heading_style: "default")
when "h3"
add_content_node(heading: node.text, heading_style: "default")
when "br", "h4", "h5", "h6", "hr", "ol", "p", "text", "ul"
append_html(node)
else
errors.add(:base, "contains invalid tag #{node.name}")
end
end

@items.each do |item|
if item.is_a?(Katalyst::Content::Content) && item.heading.blank?
item.destroy
end
end

unless @model.save
errors.copy!(@model)

return self
end

@model.items_attributes = @items.map.with_index do |item, index|
{ id: item.id, index:, depth: item.depth }
end

@model.publish!

self
end

def success?
errors.empty?
end

private

def build(type:, **)
@last = item = @model.items.build(
type:,
**defaults,
**,
)
@items << item
item
end

def add_section_node(heading:, **)
@depth = 0
item = build(
type: Katalyst::Content::Section,
heading:,
**defaults,
**,
)
@depth = 1
item
end

def add_content_node(**)
build(
type: Katalyst::Content::Content,
**defaults,
**,
)
end

def append_html(node)
content = last_content_node

node.traverse do |n|
errors.add(:base, "contains invalid tag #{n.name}") unless SUPPORTED_TRIX_TAGS.include?(n.name)
end

content.content = if content.content.present?
content.content.read_attribute_before_type_cast(:body) + node.to_html
else
node.to_html
end

content.heading ||= heading_for(content.content)

content
end

def last_content_node(**)
if @last.is_a?(Katalyst::Content::Content)
@last
else
add_content_node(**)
end
end

def heading_for(action_text)
action_text.to_plain_text.match(/([\w\s]+)/)&.match(1)&.strip
end

def defaults
{
background: Katalyst::Content::Config.backgrounds.first,
visible: true,
depth: @depth,
}
end
end
end
end
94 changes: 94 additions & 0 deletions spec/actions/katalyst/content/migrate_html_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# frozen_string_literal: true

require "rails_helper"

# rubocop:disable RSpec/ExampleLength
RSpec.describe Katalyst::Content::MigrateHtml do
subject { described_class.new }

let(:page) { create(:page) }

it { is_expected.to be_success }

it "can migrate plain text" do
content = <<~HTML
Some plain text without any tags.
HTML
subject.call(page, content)
expect(page.published_items).to contain_exactly(
have_attributes(heading: "Some plain text without any tags",
heading_style: "none",
content: match_html(content)),
)
end

it "can migrate a paragraph with inline tags" do
content = <<~HTML
<p>Some <strong>rich text</strong> with <em>inline tags</em>.</p>
HTML
subject.call(page, content)
expect(page.published_items).to contain_exactly(
have_attributes(heading: "Some rich text with inline tags",
heading_style: "none",
content: match_html(content)),
)
end

it "can migrate multiple block level tags" do
content = <<~HTML
<p>Some <strong>rich text</strong> with <em>inline tags</em>.</p>
<p>Some <strong>more</strong> content.</p>
HTML
subject.call(page, content)
expect(page.published_items).to contain_exactly(
have_attributes(heading: "Some rich text with inline tags",
heading_style: "none",
content: match_html(content)),
)
end

it "can migrate content with titles" do
content = <<~HTML
<h3>This is a content title</h3>
<h4>This is a trix title</h4>
<p>Some <strong>more</strong> content.</p>
<h3>This is a new content block</h3>
<p>Some <strong>more</strong> content.</p>
HTML
subject.call(page, content)
expect(page.published_items).to contain_exactly(
have_attributes(heading: "This is a content title",
heading_style: "default",
content: match_html(<<~HTML),
<h4>This is a trix title</h4>
<p>Some <strong>more</strong> content.</p>
HTML
),
have_attributes(heading: "This is a new content block",
content: match_html(<<~HTML),
<p>Some <strong>more</strong> content.</p>
HTML
),
)
end

it "can migrate content with section" do
content = <<~HTML
<h2>This is a section title</h2>
<p>Some content.</p>
HTML
subject.call(page, content)
expect(page.published_items).to contain_exactly(
have_attributes(type: "Katalyst::Content::Section",
heading: "This is a section title",
heading_style: "default",
depth: 0),
have_attributes(heading_style: "none",
content: match_html(<<~HTML),
<p>Some content.</p>
HTML
depth: 1),
)
end
end
# rubocop:enable RSpec/ExampleLength
76 changes: 76 additions & 0 deletions spec/support/match_html.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# frozen_string_literal: true

require "compare-xml"
require "nokogiri"
require "rspec/matchers"

class HTMLMatcher < RSpec::Matchers::BuiltIn::BaseMatcher
def initialize(expected_html, debug: true, **options)
super()

# Options documented here: https://github.com/vkononov/compare-xml
default_options = {
collapse_whitespace: true,
ignore_attr_order: true,
ignore_comments: true,
}

@options = default_options.merge(options).merge(verbose: true)

@actual = nil
@expected_html = expected_html
@expected_doc = Nokogiri::HTML5.fragment(expected_html)
@debug = debug
end

# @param [Object] response object to match against
# @return [Boolean] `true` if response matches the expected html
def matches?(response)
case response
when Nokogiri::XML::Node
@actual_doc = response
@actual_html = response.to_html
when ActionText::RichText
@actual_html = response.read_attribute_before_type_cast(:body)
@actual_doc = response.body.fragment.source
else
@actual_html = response
@actual_doc = Nokogiri::HTML.fragment(response)
end

describe_diff if @debug && !equivalent?

equivalent?
end

# @return [String] description of this matcher
def description
"match HTML against #{@expected_html}"
end

def failure_message
"expected '#{@expected_html}' but it was '#{@actual_html}'"
end

def equivalent?
diff.empty?
end

def diff
@diff ||= CompareXML.equivalent?(@expected_doc, @actual_doc, **@options)
end

def describe_diff
diff = @diff.first
expected, actual = [diff[:diff1], diff[:diff2]].map { |m| m.is_a?(String) ? m : m.to_html }
puts "Diff: #{expected} != #{actual}"
end
end

module RSpec
module Matchers
def match_html(expected_html, **options)
HTMLMatcher.new(expected_html, **options)
end
end
end

0 comments on commit a41272f

Please sign in to comment.