diff --git a/lib/lutaml/model/json_adapter/json_document.rb b/lib/lutaml/model/json_adapter/json_document.rb
index a3cf266..124fc49 100644
--- a/lib/lutaml/model/json_adapter/json_document.rb
+++ b/lib/lutaml/model/json_adapter/json_document.rb
@@ -7,7 +7,7 @@ module Model
module JsonAdapter
# Base class for JSON documents
class JsonDocument < JsonObject
- def self.parse(json)
+ def self.parse(json, _options = {})
raise NotImplementedError, "Subclasses must implement `parse`."
end
diff --git a/lib/lutaml/model/json_adapter/multi_json_adapter.rb b/lib/lutaml/model/json_adapter/multi_json_adapter.rb
index 791e8bd..4f18ef7 100644
--- a/lib/lutaml/model/json_adapter/multi_json_adapter.rb
+++ b/lib/lutaml/model/json_adapter/multi_json_adapter.rb
@@ -5,7 +5,7 @@ module Lutaml
module Model
module JsonAdapter
class MultiJsonAdapter < JsonDocument
- def self.parse(json)
+ def self.parse(json, _options = {})
data = MultiJson.load(json)
new(data)
end
diff --git a/lib/lutaml/model/json_adapter/standard_json_adapter.rb b/lib/lutaml/model/json_adapter/standard_json_adapter.rb
index 0ede855..4dbbd78 100644
--- a/lib/lutaml/model/json_adapter/standard_json_adapter.rb
+++ b/lib/lutaml/model/json_adapter/standard_json_adapter.rb
@@ -5,7 +5,7 @@ module Lutaml
module Model
module JsonAdapter
class StandardJsonAdapter < JsonDocument
- def self.parse(json)
+ def self.parse(json, _options = {})
JSON.parse(json, create_additions: false)
end
diff --git a/lib/lutaml/model/serialize.rb b/lib/lutaml/model/serialize.rb
index fa1e516..3b379dc 100644
--- a/lib/lutaml/model/serialize.rb
+++ b/lib/lutaml/model/serialize.rb
@@ -51,6 +51,7 @@ def add_custom_handling_methods_to_model(klass)
Utils.add_boolean_accessor_if_not_defined(klass, :ordered)
Utils.add_boolean_accessor_if_not_defined(klass, :mixed)
Utils.add_accessor_if_not_defined(klass, :element_order)
+ Utils.add_accessor_if_not_defined(klass, :encoding)
Utils.add_method_if_not_defined(klass,
:using_default_for) do |attribute_name|
@@ -101,23 +102,22 @@ def attribute(name, type, options = {})
end
end
- define_method(:"from_#{format}") do |data|
+ define_method(:"from_#{format}") do |data, options = {}|
adapter = Lutaml::Model::Config.send(:"#{format}_adapter")
- doc = adapter.parse(data)
- public_send(:"of_#{format}", doc)
+ doc = adapter.parse(data, options)
+ public_send(:"of_#{format}", doc, options)
end
- define_method(:"of_#{format}") do |doc|
+ define_method(:"of_#{format}") do |doc, options = {}|
if doc.is_a?(Array)
- return doc.map do |item|
- send(:"of_#{format}", item)
- end
+ return doc.map { |item| send(:"of_#{format}", item) }
end
if format == :xml
doc_hash = doc.parse_element(doc.root, self, :xml)
- apply_mappings(doc_hash, format)
+ options[:encoding] = doc.encoding
+ apply_mappings(doc_hash, format, options)
else
apply_mappings(doc.to_h, format)
end
@@ -315,6 +315,7 @@ def apply_mappings(doc, format, options = {})
end
def apply_xml_mapping(doc, instance, options = {})
+ instance.encoding = options[:encoding]
return instance unless doc
if options[:default_namespace].nil?
@@ -459,7 +460,7 @@ def ensure_utf8(value)
end
end
- attr_accessor :element_order, :schema_location
+ attr_accessor :element_order, :schema_location, :encoding
attr_writer :ordered, :mixed
def initialize(attrs = {})
@@ -548,6 +549,7 @@ def key_value(hash, key)
options)
end
+ options[:parse_encoding] = encoding if encoding
adapter.new(representation).public_send(:"to_#{format}", options)
end
end
diff --git a/lib/lutaml/model/toml_adapter/toml_document.rb b/lib/lutaml/model/toml_adapter/toml_document.rb
index 6c69331..f57e15c 100644
--- a/lib/lutaml/model/toml_adapter/toml_document.rb
+++ b/lib/lutaml/model/toml_adapter/toml_document.rb
@@ -7,7 +7,7 @@ module Model
module TomlAdapter
# Base class for TOML documents
class TomlDocument < TomlObject
- def self.parse(toml)
+ def self.parse(toml, _options = {})
raise NotImplementedError, "Subclasses must implement `parse`."
end
diff --git a/lib/lutaml/model/toml_adapter/toml_rb_adapter.rb b/lib/lutaml/model/toml_adapter/toml_rb_adapter.rb
index c1f65cf..7f0e7a1 100644
--- a/lib/lutaml/model/toml_adapter/toml_rb_adapter.rb
+++ b/lib/lutaml/model/toml_adapter/toml_rb_adapter.rb
@@ -5,7 +5,7 @@ module Lutaml
module Model
module TomlAdapter
class TomlRbAdapter < TomlDocument
- def self.parse(toml)
+ def self.parse(toml, _options = {})
data = TomlRB.parse(toml)
new(data)
end
diff --git a/lib/lutaml/model/toml_adapter/tomlib_adapter.rb b/lib/lutaml/model/toml_adapter/tomlib_adapter.rb
index d0e6e1c..be42dce 100644
--- a/lib/lutaml/model/toml_adapter/tomlib_adapter.rb
+++ b/lib/lutaml/model/toml_adapter/tomlib_adapter.rb
@@ -5,7 +5,7 @@ module Lutaml
module Model
module TomlAdapter
class TomlibAdapter < TomlDocument
- def self.parse(toml)
+ def self.parse(toml, _options = {})
data = Tomlib.load(toml)
new(data)
end
diff --git a/lib/lutaml/model/xml_adapter/nokogiri_adapter.rb b/lib/lutaml/model/xml_adapter/nokogiri_adapter.rb
index ba6f5e1..412ec0e 100644
--- a/lib/lutaml/model/xml_adapter/nokogiri_adapter.rb
+++ b/lib/lutaml/model/xml_adapter/nokogiri_adapter.rb
@@ -6,10 +6,10 @@ module Lutaml
module Model
module XmlAdapter
class NokogiriAdapter < XmlDocument
- def self.parse(xml)
- parsed = Nokogiri::XML(xml)
+ def self.parse(xml, options = {})
+ parsed = Nokogiri::XML(xml, nil, options[:encoding])
root = NokogiriElement.new(parsed.root)
- new(root)
+ new(root, parsed.encoding)
end
def to_xml(options = {})
@@ -17,6 +17,8 @@ def to_xml(options = {})
if options.key?(:encoding)
builder_options[:encoding] = options[:encoding] unless options[:encoding].nil?
+ elsif options.key?(:parse_encoding)
+ builder_options[:encoding] = options[:parse_encoding]
else
builder_options[:encoding] = "UTF-8"
end
diff --git a/lib/lutaml/model/xml_adapter/oga_adapter.rb b/lib/lutaml/model/xml_adapter/oga_adapter.rb
index c316854..528a8fe 100644
--- a/lib/lutaml/model/xml_adapter/oga_adapter.rb
+++ b/lib/lutaml/model/xml_adapter/oga_adapter.rb
@@ -5,7 +5,7 @@ module Lutaml
module Model
module XmlAdapter
class OgaAdapter < XmlDocument
- def self.parse(xml)
+ def self.parse(xml, _options = {})
parsed = Oga.parse_xml(xml)
root = OgaElement.new(parsed)
new(root)
diff --git a/lib/lutaml/model/xml_adapter/ox_adapter.rb b/lib/lutaml/model/xml_adapter/ox_adapter.rb
index 3609f50..70032f3 100644
--- a/lib/lutaml/model/xml_adapter/ox_adapter.rb
+++ b/lib/lutaml/model/xml_adapter/ox_adapter.rb
@@ -6,23 +6,28 @@ module Lutaml
module Model
module XmlAdapter
class OxAdapter < XmlDocument
- def self.parse(xml)
+ def self.parse(xml, options = {})
+ Ox.default_options = Ox.default_options.merge(encoding: options[:encoding] || "UTF-8")
+
parsed = Ox.parse(xml)
root = OxElement.new(parsed)
- new(root)
+ new(root, Ox.default_options[:encoding])
end
def to_xml(options = {})
- builder = Builder::Ox.build
builder_options = { version: options[:version] }
- if options.key?(:encoding)
- builder_options[:encoding] = options[:encoding] unless options[:encoding].nil?
- else
- builder_options[:encoding] = "UTF-8"
- end
+ builder_options[:encoding] = if options.key?(:encoding)
+ options[:encoding]
+ elsif options.key?(:parse_encoding)
+ options[:parse_encoding]
+ else
+ "UTF-8"
+ end
+
+ builder = Builder::Ox.build
+ builder.xml.instruct(:xml, encoding: options[:parse_encoding])
- builder.xml.instruct(:xml, builder_options)
if @root.is_a?(Lutaml::Model::XmlAdapter::OxElement)
@root.build_xml(builder)
elsif ordered?(@root, options)
@@ -34,7 +39,12 @@ def to_xml(options = {})
end
xml_data = builder.xml.to_s
- options[:declaration] ? xml_data : xml_data.sub(/\A<\?xml[^>]*\?>\n?/, "")
+ if builder_options[:encoding] && xml_data.valid_encoding?
+ xml_data = xml_data.encode(builder_options[:encoding])
+ end
+
+ stripped_data = xml_data.lines.drop(1).join
+ options[:declaration] ? declaration(options) + stripped_data : stripped_data
end
private
diff --git a/lib/lutaml/model/xml_adapter/xml_document.rb b/lib/lutaml/model/xml_adapter/xml_document.rb
index 9a8052a..d7325a1 100644
--- a/lib/lutaml/model/xml_adapter/xml_document.rb
+++ b/lib/lutaml/model/xml_adapter/xml_document.rb
@@ -7,13 +7,14 @@ module Lutaml
module Model
module XmlAdapter
class XmlDocument
- attr_reader :root
+ attr_reader :root, :encoding
- def initialize(root)
+ def initialize(root, encoding = nil)
@root = root
+ @encoding = encoding
end
- def self.parse(xml)
+ def self.parse(xml, _options = {})
raise NotImplementedError, "Subclasses must implement `parse`."
end
diff --git a/lib/lutaml/model/yaml_adapter/standard_yaml_adapter.rb b/lib/lutaml/model/yaml_adapter/standard_yaml_adapter.rb
index 7847e3f..7ff3625 100644
--- a/lib/lutaml/model/yaml_adapter/standard_yaml_adapter.rb
+++ b/lib/lutaml/model/yaml_adapter/standard_yaml_adapter.rb
@@ -14,7 +14,7 @@ class StandardYamlAdapter < YamlDocument
PERMITTED_CLASSES_BASE
end.freeze
- def self.parse(yaml)
+ def self.parse(yaml, _options = {})
YAML.safe_load(yaml, permitted_classes: PERMITTED_CLASSES)
end
diff --git a/lib/lutaml/model/yaml_adapter/yaml_document.rb b/lib/lutaml/model/yaml_adapter/yaml_document.rb
index 62ae81b..217ffa0 100644
--- a/lib/lutaml/model/yaml_adapter/yaml_document.rb
+++ b/lib/lutaml/model/yaml_adapter/yaml_document.rb
@@ -7,7 +7,7 @@ module Model
module YamlAdapter
# Base class for YAML documents
class YamlDocument < YamlObject
- def self.parse(yaml)
+ def self.parse(yaml, _options = {})
raise NotImplementedError, "Subclasses must implement `parse`."
end
diff --git a/spec/fixtures/xml/latin_encoding.xml b/spec/fixtures/xml/latin_encoding.xml
new file mode 100644
index 0000000..347431d
--- /dev/null
+++ b/spec/fixtures/xml/latin_encoding.xml
@@ -0,0 +1,5 @@
+
+ Jos
+ Mller
+ Reminder
+
diff --git a/spec/fixtures/xml/shift_jis.xml b/spec/fixtures/xml/shift_jis.xml
new file mode 100644
index 0000000..52ec13d
--- /dev/null
+++ b/spec/fixtures/xml/shift_jis.xml
@@ -0,0 +1,4 @@
+
+ 菑pP
+ 123456
+
diff --git a/spec/lutaml/model/mixed_content_spec.rb b/spec/lutaml/model/mixed_content_spec.rb
index 4b66c59..9456783 100644
--- a/spec/lutaml/model/mixed_content_spec.rb
+++ b/spec/lutaml/model/mixed_content_spec.rb
@@ -72,6 +72,28 @@ class Article < Lutaml::Model::Serializable
end
end
+ class Latin < Lutaml::Model::Serializable
+ attribute :the, :string
+ attribute :from, :string
+ attribute :heading, :string
+
+ xml do
+ root "note"
+ map_element "to", to: :the
+ map_element "from", to: :from
+ map_element "heading", to: :heading
+ end
+ end
+
+ class Shift < Lutaml::Model::Serializable
+ attribute :field, :string, collection: true
+
+ xml do
+ root "root"
+ map_element "FieldName", to: :field
+ end
+ end
+
class SpecialCharContentWithMixedTrue < Lutaml::Model::Serializable
attribute :content, :string
@@ -651,24 +673,185 @@ class HexCode < Lutaml::Model::Serializable
context "when encoding: nil xml" do
let(:expected_encoding_nil_nokogiri_xml) { "∑computer security∏ type of operation specified µ by an access right" }
- let(:expected_encoding_nil_ox_xml) { "\xE2\x88\x91computer security\xE2\x88\x8F type of \xE2\x80\x8B operation specified \xC2\xB5 by an access right" }
+ let(:expected_encoding_nil_ox_xml) { "∑computer security∏ type of operation specified µ by an access right" }
it "serializes special char mixed content correctly with encoding: nil to get hexcode" do
parsed = MixedContentSpec::HexCode.from_xml(xml)
serialized = parsed.to_xml(encoding: nil)
- if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter
- expected_output = expected_encoding_nil_ox_xml
- expected_output.force_encoding("ASCII-8BIT")
- else
- expected_output = expected_encoding_nil_nokogiri_xml
- end
+ expected_output = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter
+ expected_encoding_nil_ox_xml
+ else
+ expected_encoding_nil_nokogiri_xml
+ end
expect(serialized.strip).to include(expected_output)
end
end
end
end
+
+ context "when use encoding in parsing" do
+ context "when use SHIFT-JIS encoding" do
+ let(:fixture) { File.read(fixture_path("xml/shift_jis.xml"), encoding: "Shift_JIS") }
+
+ describe ".from_xml" do
+ it "verifies the encoding of file read" do
+ expect(fixture.encoding.to_s).to eq("Shift_JIS")
+ end
+
+ it "deserializes SHIFT encoded content correctly with explicit encoding option" do
+ parsed = MixedContentSpec::Shift.from_xml(fixture, encoding: "Shift_JIS")
+
+ expected_content = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter
+ "\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("Shift_JIS")
+ else
+ "手書き英字1"
+ end
+
+ expect(parsed.field).to include(expected_content)
+ end
+
+ it "deserializes SHIFT encoded content incorrectly without explicit encoding option" do
+ parsed = MixedContentSpec::Shift.from_xml(fixture)
+
+ expected_content = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter
+ "\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("UTF-8")
+ else
+ "�菑���p���P"
+ end
+
+ expect(parsed.field).to include(expected_content)
+ end
+ end
+
+ describe ".to_xml" do
+ it "serializes SHIFT-JIS encoding content correctly reading from file" do
+ parsed = MixedContentSpec::Shift.from_xml(fixture, encoding: "Shift_JIS")
+ serialized = parsed.to_xml
+
+ expect(serialized.strip).to eq(fixture.strip)
+ end
+
+ it "serializes SHIFT encoded content correctly with explicit encoding option both in parsing and deserializing" do
+ parsed = MixedContentSpec::Shift.from_xml(fixture, encoding: "Shift_JIS")
+ serialized = parsed.to_xml(encoding: "UTF-8")
+
+ expected_xml = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter
+ "\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("Shift_JIS")
+ else
+ "手書き英字1"
+ end
+
+ expect(parsed.field).to include(expected_xml)
+ expect(parsed.encoding).to eq("Shift_JIS")
+
+ expect(serialized).to include("手書き英字1")
+ expect(serialized.encoding.to_s).to eq("UTF-8")
+ end
+
+ it "serializes SHIFT encoded content correctly with explicit encoding option" do
+ parsed = MixedContentSpec::Shift.from_xml(fixture, encoding: "Shift_JIS")
+ serialized = parsed.to_xml(encoding: "Shift_JIS")
+
+ expected_xml = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter
+ "\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("Shift_JIS")
+ else
+ "手書き英字1"
+ end
+
+ expect(parsed.field).to include(expected_xml)
+ expect(parsed.encoding).to eq("Shift_JIS")
+
+ expect(serialized).to include("\x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P".force_encoding("Shift_JIS"))
+ expect(serialized.encoding.to_s).to eq("Shift_JIS")
+ end
+
+ it "serializes SHIFT encoded content correctly with declaration: true" do
+ parsed = MixedContentSpec::Shift.from_xml(fixture, encoding: "Shift_JIS")
+ serialized = parsed.to_xml(declaration: true, encoding: "Shift_JIS")
+
+ expected_xml = "\n\n \x8E\xE8\x8F\x91\x82\xAB\x89p\x8E\x9A\x82P\n 123456\n"
+
+ expect(serialized).to be_equivalent_to(expected_xml)
+ expect(serialized.encoding.to_s).to eq("Shift_JIS")
+ end
+
+ it "serializes SHIFT-JIS content incorrectly bcz no encoding provided during parsing" do
+ parsed = MixedContentSpec::Shift.from_xml(fixture)
+ serialized = parsed.to_xml(encoding: "Shift_JIS")
+
+ expected_content = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter
+ "\n \x8E菑\x82\xAB\x89p\x8E\x9A\x82P\n 123456\n\n"
+ else
+ "\n �菑���p���P\n 123456\n"
+ end
+
+ expect(serialized).to eq(expected_content)
+ end
+
+ it "serializes SHIFT-JIS encoding content correctly reading from string" do
+ xml = "手書き英字1123456".encode("Shift_JIS")
+ parsed = MixedContentSpec::Shift.from_xml(xml, encoding: "Shift_JIS")
+ serialized = parsed.to_xml(encoding: "Shift_JIS")
+
+ expect(serialized).to be_equivalent_to(xml)
+ end
+
+ it "serializes SHIFT-JIS encoding content correctly" do
+ parsed = MixedContentSpec::Shift.from_xml(fixture, encoding: "Shift_JIS")
+ serialized = parsed.to_xml(encoding: "Shift_JIS")
+
+ expect(serialized).to be_equivalent_to(fixture)
+ end
+ end
+ end
+
+ context "when use LATIN (ISO-8859-1) encoding" do
+ let(:fixture) { File.read(fixture_path("xml/latin_encoding.xml"), encoding: "ISO-8859-1") }
+
+ describe ".from_xml" do
+ it "verifies the encoding of file read" do
+ expect(fixture.encoding.to_s).to eq("ISO-8859-1")
+ end
+
+ it "deserializes latin encoded content correctly" do
+ parsed = MixedContentSpec::Latin.from_xml(fixture, encoding: "ISO-8859-1")
+
+ expected_content = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter
+ ["M\xFCller".force_encoding("ISO-8859-1"), "Jos\xE9".force_encoding("ISO-8859-1")]
+ else
+ ["Müller", "José"]
+ end
+
+ expect(parsed.from).to eq(expected_content[0])
+ expect(parsed.the).to eq(expected_content[1])
+ end
+
+ it "deserializes latin encoded content incorrectly" do
+ parsed = MixedContentSpec::Latin.from_xml(fixture)
+
+ expected_content = if adapter_class == Lutaml::Model::XmlAdapter::OxAdapter
+ ["M\xFCller", "Jos\xE9"]
+ else
+ ["M�ller", "Jos�"]
+ end
+
+ expect(parsed.from).to eq(expected_content[0])
+ expect(parsed.the).to eq(expected_content[1])
+ end
+ end
+
+ describe ".to_xml" do
+ it "serializes latin encoded content correctly" do
+ parsed = MixedContentSpec::Latin.from_xml(fixture, encoding: "ISO-8859-1")
+ serialized = parsed.to_xml
+
+ expect(serialized.strip).to eq("\n Jos\xE9\n M\xFCller\n Reminder\n".force_encoding("ISO-8859-1"))
+ end
+ end
+ end
+ end
end
describe Lutaml::Model::XmlAdapter::NokogiriAdapter do