From 9128991d7b00bfd9dc54b6b389a964be1981f0c5 Mon Sep 17 00:00:00 2001 From: mwlang Date: Wed, 23 Mar 2016 21:37:03 -0700 Subject: [PATCH 1/2] implemented parser for Ox --- lib/nori.rb | 2 +- lib/nori/parser/ox.rb | 74 +++++++++++++++++++++++++++++++ nori.gemspec | 3 +- spec/nori/api_spec.rb | 2 +- spec/nori/core_ext/object_spec.rb | 4 +- 5 files changed, 80 insertions(+), 5 deletions(-) create mode 100644 lib/nori/parser/ox.rb diff --git a/lib/nori.rb b/lib/nori.rb index ba55215..c86bd61 100644 --- a/lib/nori.rb +++ b/lib/nori.rb @@ -11,7 +11,7 @@ def self.hash_key(name, options = {}) name end - PARSERS = { :rexml => "REXML", :nokogiri => "Nokogiri" } + PARSERS = { :rexml => "REXML", :nokogiri => "Nokogiri", :ox => "Ox" } def initialize(options = {}) defaults = { diff --git a/lib/nori/parser/ox.rb b/lib/nori/parser/ox.rb new file mode 100644 index 0000000..7bd89b5 --- /dev/null +++ b/lib/nori/parser/ox.rb @@ -0,0 +1,74 @@ +require "ox" + +class Nori + module Parser + + # = Ox::Parser::Ox + # + # Ox SAX parser. + module Ox + + class Document < ::Ox::Sax + attr_accessor :options + attr_accessor :element_name + + def stack + @stack ||= [] + end + + def attr_stack + @attr_stack ||= {} + end + + def start_element(name, attrs = []) + @element_name = name.to_s + attr_stack.clear + end + + # To keep backward behaviour compatibility + # delete last child if it is a space-only text node + def end_element(name) + if stack.size > 1 + last = stack.pop + maybe_string = last.children.last + if maybe_string.is_a?(String) and maybe_string.strip.empty? + last.children.pop + end + stack.last.add_node last + end + end + + # If this node is a successive character then add it as is. + # First child being a space-only text node will not be added + # because there is no previous characters. + def characters(string) + last = stack.last + if last and last.children.last.is_a?(String) or string.strip.size > 0 + last.add_node(string) + end + end + + alias text characters + alias cdata characters + + def attr(name, str) + attr_stack[name.to_s] = str + end + + def attrs_done + return if element_name.nil? + stack.push Nori::XMLUtilityNode.new(options, element_name, Hash[*attr_stack.flatten]) + end + + end + + def self.parse(xml, options) + document = Document.new + document.options = options + ::Ox.sax_parse document, xml + document.stack.length > 0 ? document.stack.pop.to_hash : {} + end + + end + end +end diff --git a/nori.gemspec b/nori.gemspec index 5195417..7d56440 100644 --- a/nori.gemspec +++ b/nori.gemspec @@ -18,7 +18,8 @@ Gem::Specification.new do |s| s.add_development_dependency "rake", "~> 10.0" s.add_development_dependency "nokogiri", ">= 1.4.0" s.add_development_dependency "rspec", "~> 2.12" - + s.add_development_dependency "ox", "~> 2.3" + s.files = `git ls-files`.split("\n") s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) } diff --git a/spec/nori/api_spec.rb b/spec/nori/api_spec.rb index 9a4b98c..7333628 100644 --- a/spec/nori/api_spec.rb +++ b/spec/nori/api_spec.rb @@ -4,7 +4,7 @@ describe "PARSERS" do it "should return a Hash of parser details" do - expect(Nori::PARSERS).to eq({ :rexml => "REXML", :nokogiri => "Nokogiri" }) + expect(Nori::PARSERS).to eq({ :rexml => "REXML", :nokogiri => "Nokogiri", :ox => "Ox" }) end end diff --git a/spec/nori/core_ext/object_spec.rb b/spec/nori/core_ext/object_spec.rb index 73e50ec..ba8ae45 100644 --- a/spec/nori/core_ext/object_spec.rb +++ b/spec/nori/core_ext/object_spec.rb @@ -5,13 +5,13 @@ describe "#blank?" do [nil, false, [], {}].each do |object| it "should return true for: #{object.inspect}" do - expect(object.blank?).to be_true + expect(object.blank?).to be_truthy end end [true, [nil], 1, "string", { :key => "value" }].each do |object| it "should return false for: #{object.inspect}" do - expect(object.blank?).to be_false + expect(object.blank?).to be_falsey end end end From 45be3dd731ddb79f7021eb0bc537dc6437d7cc29 Mon Sep 17 00:00:00 2001 From: mwlang Date: Wed, 23 Mar 2016 21:39:06 -0700 Subject: [PATCH 2/2] updated README --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 48a2b61..e673612 100644 --- a/README.md +++ b/README.md @@ -17,14 +17,20 @@ parser.parse("This is the contents") # => { 'tag' => 'This is the contents' } ``` -Nori supports pluggable parsers and ships with both REXML and Nokogiri implementations. +Nori supports pluggable parsers and ships with REXML, Nokogiri, and Ox implementations. It defaults to Nokogiri since v2.0.0, but you can change it to use REXML via: ``` ruby Nori.new(:parser => :rexml) # or :nokogiri ``` -Make sure Nokogiri is in your LOAD_PATH when parsing XML, because Nori tries to load it +or to Ox via: + +``` ruby +Nori.new(:parser => :ox) +``` + +Make sure Nokogiri or Ox is in your LOAD_PATH when parsing XML, because Nori tries to load it when it's needed.