diff --git a/lib/lutaml/model/json_adapter/json_document.rb b/lib/lutaml/model/json_adapter/json_document.rb index a3cf266..124fc49 100644 --- a/lib/lutaml/model/json_adapter/json_document.rb +++ b/lib/lutaml/model/json_adapter/json_document.rb @@ -7,7 +7,7 @@ module Model module JsonAdapter # Base class for JSON documents class JsonDocument < JsonObject - def self.parse(json) + def self.parse(json, _options = {}) raise NotImplementedError, "Subclasses must implement `parse`." end diff --git a/lib/lutaml/model/json_adapter/multi_json_adapter.rb b/lib/lutaml/model/json_adapter/multi_json_adapter.rb index 791e8bd..4f18ef7 100644 --- a/lib/lutaml/model/json_adapter/multi_json_adapter.rb +++ b/lib/lutaml/model/json_adapter/multi_json_adapter.rb @@ -5,7 +5,7 @@ module Lutaml module Model module JsonAdapter class MultiJsonAdapter < JsonDocument - def self.parse(json) + def self.parse(json, _options = {}) data = MultiJson.load(json) new(data) end diff --git a/lib/lutaml/model/json_adapter/standard_json_adapter.rb b/lib/lutaml/model/json_adapter/standard_json_adapter.rb index 0ede855..4dbbd78 100644 --- a/lib/lutaml/model/json_adapter/standard_json_adapter.rb +++ b/lib/lutaml/model/json_adapter/standard_json_adapter.rb @@ -5,7 +5,7 @@ module Lutaml module Model module JsonAdapter class StandardJsonAdapter < JsonDocument - def self.parse(json) + def self.parse(json, _options = {}) JSON.parse(json, create_additions: false) end diff --git a/lib/lutaml/model/serialize.rb b/lib/lutaml/model/serialize.rb index fa1e516..3b379dc 100644 --- a/lib/lutaml/model/serialize.rb +++ b/lib/lutaml/model/serialize.rb @@ -51,6 +51,7 @@ def add_custom_handling_methods_to_model(klass) Utils.add_boolean_accessor_if_not_defined(klass, :ordered) Utils.add_boolean_accessor_if_not_defined(klass, :mixed) Utils.add_accessor_if_not_defined(klass, :element_order) + Utils.add_accessor_if_not_defined(klass, :encoding) Utils.add_method_if_not_defined(klass, :using_default_for) do |attribute_name| @@ -101,23 +102,22 @@ def attribute(name, type, options = {}) end end - define_method(:"from_#{format}") do |data| + define_method(:"from_#{format}") do |data, options = {}| adapter = Lutaml::Model::Config.send(:"#{format}_adapter") - doc = adapter.parse(data) - public_send(:"of_#{format}", doc) + doc = adapter.parse(data, options) + public_send(:"of_#{format}", doc, options) end - define_method(:"of_#{format}") do |doc| + define_method(:"of_#{format}") do |doc, options = {}| if doc.is_a?(Array) - return doc.map do |item| - send(:"of_#{format}", item) - end + return doc.map { |item| send(:"of_#{format}", item) } end if format == :xml doc_hash = doc.parse_element(doc.root, self, :xml) - apply_mappings(doc_hash, format) + options[:encoding] = doc.encoding + apply_mappings(doc_hash, format, options) else apply_mappings(doc.to_h, format) end @@ -315,6 +315,7 @@ def apply_mappings(doc, format, options = {}) end def apply_xml_mapping(doc, instance, options = {}) + instance.encoding = options[:encoding] return instance unless doc if options[:default_namespace].nil? @@ -459,7 +460,7 @@ def ensure_utf8(value) end end - attr_accessor :element_order, :schema_location + attr_accessor :element_order, :schema_location, :encoding attr_writer :ordered, :mixed def initialize(attrs = {}) @@ -548,6 +549,7 @@ def key_value(hash, key) options) end + options[:parse_encoding] = encoding if encoding adapter.new(representation).public_send(:"to_#{format}", options) end end diff --git a/lib/lutaml/model/toml_adapter/toml_document.rb b/lib/lutaml/model/toml_adapter/toml_document.rb index 6c69331..f57e15c 100644 --- a/lib/lutaml/model/toml_adapter/toml_document.rb +++ b/lib/lutaml/model/toml_adapter/toml_document.rb @@ -7,7 +7,7 @@ module Model module TomlAdapter # Base class for TOML documents class TomlDocument < TomlObject - def self.parse(toml) + def self.parse(toml, _options = {}) raise NotImplementedError, "Subclasses must implement `parse`." end diff --git a/lib/lutaml/model/toml_adapter/toml_rb_adapter.rb b/lib/lutaml/model/toml_adapter/toml_rb_adapter.rb index c1f65cf..7f0e7a1 100644 --- a/lib/lutaml/model/toml_adapter/toml_rb_adapter.rb +++ b/lib/lutaml/model/toml_adapter/toml_rb_adapter.rb @@ -5,7 +5,7 @@ module Lutaml module Model module TomlAdapter class TomlRbAdapter < TomlDocument - def self.parse(toml) + def self.parse(toml, _options = {}) data = TomlRB.parse(toml) new(data) end diff --git a/lib/lutaml/model/toml_adapter/tomlib_adapter.rb b/lib/lutaml/model/toml_adapter/tomlib_adapter.rb index d0e6e1c..be42dce 100644 --- a/lib/lutaml/model/toml_adapter/tomlib_adapter.rb +++ b/lib/lutaml/model/toml_adapter/tomlib_adapter.rb @@ -5,7 +5,7 @@ module Lutaml module Model module TomlAdapter class TomlibAdapter < TomlDocument - def self.parse(toml) + def self.parse(toml, _options = {}) data = Tomlib.load(toml) new(data) end diff --git a/lib/lutaml/model/xml_adapter/nokogiri_adapter.rb b/lib/lutaml/model/xml_adapter/nokogiri_adapter.rb index ba6f5e1..412ec0e 100644 --- a/lib/lutaml/model/xml_adapter/nokogiri_adapter.rb +++ b/lib/lutaml/model/xml_adapter/nokogiri_adapter.rb @@ -6,10 +6,10 @@ module Lutaml module Model module XmlAdapter class NokogiriAdapter < XmlDocument - def self.parse(xml) - parsed = Nokogiri::XML(xml) + def self.parse(xml, options = {}) + parsed = Nokogiri::XML(xml, nil, options[:encoding]) root = NokogiriElement.new(parsed.root) - new(root) + new(root, parsed.encoding) end def to_xml(options = {}) @@ -17,6 +17,8 @@ def to_xml(options = {}) if options.key?(:encoding) builder_options[:encoding] = options[:encoding] unless options[:encoding].nil? + elsif options.key?(:parse_encoding) + builder_options[:encoding] = options[:parse_encoding] else builder_options[:encoding] = "UTF-8" end diff --git a/lib/lutaml/model/xml_adapter/oga_adapter.rb b/lib/lutaml/model/xml_adapter/oga_adapter.rb index c316854..528a8fe 100644 --- a/lib/lutaml/model/xml_adapter/oga_adapter.rb +++ b/lib/lutaml/model/xml_adapter/oga_adapter.rb @@ -5,7 +5,7 @@ module Lutaml module Model module XmlAdapter class OgaAdapter < XmlDocument - def self.parse(xml) + def self.parse(xml, _options = {}) parsed = Oga.parse_xml(xml) root = OgaElement.new(parsed) new(root) diff --git a/lib/lutaml/model/xml_adapter/ox_adapter.rb b/lib/lutaml/model/xml_adapter/ox_adapter.rb index 3609f50..70032f3 100644 --- a/lib/lutaml/model/xml_adapter/ox_adapter.rb +++ b/lib/lutaml/model/xml_adapter/ox_adapter.rb @@ -6,23 +6,28 @@ module Lutaml module Model module XmlAdapter class OxAdapter < XmlDocument - def self.parse(xml) + def self.parse(xml, options = {}) + Ox.default_options = Ox.default_options.merge(encoding: options[:encoding] || "UTF-8") + parsed = Ox.parse(xml) root = OxElement.new(parsed) - new(root) + new(root, Ox.default_options[:encoding]) end def to_xml(options = {}) - builder = Builder::Ox.build builder_options = { version: options[:version] } - if options.key?(:encoding) - builder_options[:encoding] = options[:encoding] unless options[:encoding].nil? - else - builder_options[:encoding] = "UTF-8" - end + builder_options[:encoding] = if options.key?(:encoding) + options[:encoding] + elsif options.key?(:parse_encoding) + options[:parse_encoding] + else + "UTF-8" + end + + builder = Builder::Ox.build + builder.xml.instruct(:xml, encoding: options[:parse_encoding]) - builder.xml.instruct(:xml, builder_options) if @root.is_a?(Lutaml::Model::XmlAdapter::OxElement) @root.build_xml(builder) elsif ordered?(@root, options) @@ -34,7 +39,12 @@ def to_xml(options = {}) end xml_data = builder.xml.to_s - options[:declaration] ? xml_data : xml_data.sub(/\A<\?xml[^>]*\?>\n?/, "") + if builder_options[:encoding] && xml_data.valid_encoding? + xml_data = xml_data.encode(builder_options[:encoding]) + end + + stripped_data = xml_data.lines.drop(1).join + options[:declaration] ? declaration(options) + stripped_data : stripped_data end private diff --git a/lib/lutaml/model/xml_adapter/xml_document.rb b/lib/lutaml/model/xml_adapter/xml_document.rb index 9a8052a..d7325a1 100644 --- a/lib/lutaml/model/xml_adapter/xml_document.rb +++ b/lib/lutaml/model/xml_adapter/xml_document.rb @@ -7,13 +7,14 @@ module Lutaml module Model module XmlAdapter class XmlDocument - attr_reader :root + attr_reader :root, :encoding - def initialize(root) + def initialize(root, encoding = nil) @root = root + @encoding = encoding end - def self.parse(xml) + def self.parse(xml, _options = {}) raise NotImplementedError, "Subclasses must implement `parse`." end diff --git a/lib/lutaml/model/yaml_adapter/standard_yaml_adapter.rb b/lib/lutaml/model/yaml_adapter/standard_yaml_adapter.rb index 7847e3f..7ff3625 100644 --- a/lib/lutaml/model/yaml_adapter/standard_yaml_adapter.rb +++ b/lib/lutaml/model/yaml_adapter/standard_yaml_adapter.rb @@ -14,7 +14,7 @@ class StandardYamlAdapter < YamlDocument PERMITTED_CLASSES_BASE end.freeze - def self.parse(yaml) + def self.parse(yaml, _options = {}) YAML.safe_load(yaml, permitted_classes: PERMITTED_CLASSES) end diff --git a/lib/lutaml/model/yaml_adapter/yaml_document.rb b/lib/lutaml/model/yaml_adapter/yaml_document.rb index 62ae81b..217ffa0 100644 --- a/lib/lutaml/model/yaml_adapter/yaml_document.rb +++ b/lib/lutaml/model/yaml_adapter/yaml_document.rb @@ -7,7 +7,7 @@ module Model module YamlAdapter # Base class for YAML documents class YamlDocument < YamlObject - def self.parse(yaml) + def self.parse(yaml, _options = {}) raise NotImplementedError, "Subclasses must implement `parse`." end diff --git a/spec/fixtures/xml/latin_encoding.xml b/spec/fixtures/xml/latin_encoding.xml new file mode 100644 index 0000000..347431d --- /dev/null +++ b/spec/fixtures/xml/latin_encoding.xml @@ -0,0 +1,5 @@ + + Jos + Mller + Reminder + diff --git a/spec/fixtures/xml/shift_jis.xml b/spec/fixtures/xml/shift_jis.xml new file mode 100644 index 0000000..52ec13d --- /dev/null +++ b/spec/fixtures/xml/shift_jis.xml @@ -0,0 +1,4 @@ + + 菑pP + 123456 + diff --git a/spec/lutaml/model/mixed_content_spec.rb b/spec/lutaml/model/mixed_content_spec.rb index 4b66c59..9456783 100644 --- a/spec/lutaml/model/mixed_content_spec.rb +++ b/spec/lutaml/model/mixed_content_spec.rb @@ -72,6 +72,28 @@ class Article < Lutaml::Model::Serializable end end + class Latin < Lutaml::Model::Serializable + attribute :the, :string + attribute :from, :string + attribute :heading, :string + + xml do + root "note" + map_element "to", to: :the + map_element "from", to: :from + map_element "heading", to: :heading + end + end + + class Shift < Lutaml::Model::Serializable + attribute :field, :string, collection: true + + xml do + root "root" + map_element "FieldName", to: :field + end + end + class SpecialCharContentWithMixedTrue < Lutaml::Model::Serializable attribute :content, :string @@ -651,24 +673,185 @@ class HexCode < Lutaml::Model::Serializable context "when encoding: nil xml" do let(:expected_encoding_nil_nokogiri_xml) { "∑computer security∏ type of ​ operation specified µ by an access right" } - let(:expected_encoding_nil_ox_xml) { "\xE2\x88\x91computer security\xE2\x88\x8F type of \xE2\x80\x8B operation specified \xC2\xB5 by an access right" } + let(:expected_encoding_nil_ox_xml) { "∑computer security∏ type of ​ operation specified µ by an access right" } it "serializes special char mixed content correctly with encoding: nil to get hexcode" do parsed = MixedContentSpec::HexCode.from_xml(xml) serialized = parsed.to_xml(encoding: nil) - if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter - expected_output = expected_encoding_nil_ox_xml - expected_output.force_encoding("ASCII-8BIT") - else - expected_output = expected_encoding_nil_nokogiri_xml - end + expected_output = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter + expected_encoding_nil_ox_xml + else + expected_encoding_nil_nokogiri_xml + end expect(serialized.strip).to include(expected_output) end end end end + + context "when use encoding in parsing" do + context "when use SHIFT-JIS encoding" do + let(:fixture) { File.read(fixture_path("xml/shift_jis.xml"), encoding: "Shift_JIS") } + + describe ".from_xml" do + it "verifies the encoding of file read" do + expect(fixture.encoding.to_s).to eq("Shift_JIS") + end + + it "deserializes SHIFT encoded content correctly with explicit encoding option" do + parsed = MixedContentSpec::Shift.from_xml(fixture, encoding: "Shift_JIS") + + expected_content = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter + "\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("Shift_JIS") + else + "手書き英字1" + end + + expect(parsed.field).to include(expected_content) + end + + it "deserializes SHIFT encoded content incorrectly without explicit encoding option" do + parsed = MixedContentSpec::Shift.from_xml(fixture) + + expected_content = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter + "\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("UTF-8") + else + "�菑���p���P" + end + + expect(parsed.field).to include(expected_content) + end + end + + describe ".to_xml" do + it "serializes SHIFT-JIS encoding content correctly reading from file" do + parsed = MixedContentSpec::Shift.from_xml(fixture, encoding: "Shift_JIS") + serialized = parsed.to_xml + + expect(serialized.strip).to eq(fixture.strip) + end + + it "serializes SHIFT encoded content correctly with explicit encoding option both in parsing and deserializing" do + parsed = MixedContentSpec::Shift.from_xml(fixture, encoding: "Shift_JIS") + serialized = parsed.to_xml(encoding: "UTF-8") + + expected_xml = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter + "\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("Shift_JIS") + else + "手書き英字1" + end + + expect(parsed.field).to include(expected_xml) + expect(parsed.encoding).to eq("Shift_JIS") + + expect(serialized).to include("手書き英字1") + expect(serialized.encoding.to_s).to eq("UTF-8") + end + + it "serializes SHIFT encoded content correctly with explicit encoding option" do + parsed = MixedContentSpec::Shift.from_xml(fixture, encoding: "Shift_JIS") + serialized = parsed.to_xml(encoding: "Shift_JIS") + + expected_xml = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter + "\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("Shift_JIS") + else + "手書き英字1" + end + + expect(parsed.field).to include(expected_xml) + expect(parsed.encoding).to eq("Shift_JIS") + + expect(serialized).to include("\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("Shift_JIS")) + expect(serialized.encoding.to_s).to eq("Shift_JIS") + end + + it "serializes SHIFT encoded content correctly with declaration: true" do + parsed = MixedContentSpec::Shift.from_xml(fixture, encoding: "Shift_JIS") + serialized = parsed.to_xml(declaration: true, encoding: "Shift_JIS") + + expected_xml = "\n\n \x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P\n 123456\n" + + expect(serialized).to be_equivalent_to(expected_xml) + expect(serialized.encoding.to_s).to eq("Shift_JIS") + end + + it "serializes SHIFT-JIS content incorrectly bcz no encoding provided during parsing" do + parsed = MixedContentSpec::Shift.from_xml(fixture) + serialized = parsed.to_xml(encoding: "Shift_JIS") + + expected_content = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter + "\n \x8E菑\x82\xAB\x89p\x8E\x9A\x82P\n 123456\n\n" + else + "\n �菑���p���P\n 123456\n" + end + + expect(serialized).to eq(expected_content) + end + + it "serializes SHIFT-JIS encoding content correctly reading from string" do + xml = "手書き英字1123456".encode("Shift_JIS") + parsed = MixedContentSpec::Shift.from_xml(xml, encoding: "Shift_JIS") + serialized = parsed.to_xml(encoding: "Shift_JIS") + + expect(serialized).to be_equivalent_to(xml) + end + + it "serializes SHIFT-JIS encoding content correctly" do + parsed = MixedContentSpec::Shift.from_xml(fixture, encoding: "Shift_JIS") + serialized = parsed.to_xml(encoding: "Shift_JIS") + + expect(serialized).to be_equivalent_to(fixture) + end + end + end + + context "when use LATIN (ISO-8859-1) encoding" do + let(:fixture) { File.read(fixture_path("xml/latin_encoding.xml"), encoding: "ISO-8859-1") } + + describe ".from_xml" do + it "verifies the encoding of file read" do + expect(fixture.encoding.to_s).to eq("ISO-8859-1") + end + + it "deserializes latin encoded content correctly" do + parsed = MixedContentSpec::Latin.from_xml(fixture, encoding: "ISO-8859-1") + + expected_content = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter + ["M\xFCller".force_encoding("ISO-8859-1"), "Jos\xE9".force_encoding("ISO-8859-1")] + else + ["Müller", "José"] + end + + expect(parsed.from).to eq(expected_content[0]) + expect(parsed.the).to eq(expected_content[1]) + end + + it "deserializes latin encoded content incorrectly" do + parsed = MixedContentSpec::Latin.from_xml(fixture) + + expected_content = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter + ["M\xFCller", "Jos\xE9"] + else + ["M�ller", "Jos�"] + end + + expect(parsed.from).to eq(expected_content[0]) + expect(parsed.the).to eq(expected_content[1]) + end + end + + describe ".to_xml" do + it "serializes latin encoded content correctly" do + parsed = MixedContentSpec::Latin.from_xml(fixture, encoding: "ISO-8859-1") + serialized = parsed.to_xml + + expect(serialized.strip).to eq("\n Jos\xE9\n M\xFCller\n Reminder\n".force_encoding("ISO-8859-1")) + end + end + end + end end describe Lutaml::Model::XmlAdapter::NokogiriAdapter do