From 18cc09b4839a77c1ce237df43e8262e9698996e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Kr=C3=BCgler?= Date: Fri, 3 May 2024 17:59:17 +0200 Subject: [PATCH] Add support for Java string Unicode supplementary plane codepoints to the two StreamWriter classes --- .../runtime/impl/ISO88591StreamWriter.java | 21 ++++++---- .../jibx/runtime/impl/UTF8StreamWriter.java | 42 +++++++++++++++---- 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/build/src/org/jibx/runtime/impl/ISO88591StreamWriter.java b/build/src/org/jibx/runtime/impl/ISO88591StreamWriter.java index f03ac61..0874917 100644 --- a/build/src/org/jibx/runtime/impl/ISO88591StreamWriter.java +++ b/build/src/org/jibx/runtime/impl/ISO88591StreamWriter.java @@ -169,11 +169,12 @@ protected void defineNamespace(int index, String prefix) * @throws IOException if error writing to document */ protected void writeAttributeText(String text) throws IOException { - int length = text.length(); + final int length = text.length(); makeSpace(length * 6); int fill = m_fillOffset; - for (int i = 0; i < length; i++) { - char chr = text.charAt(i); + int chr; + for (int i = 0; i < length; i += Character.charCount(chr)) { + chr = text.codePointAt(i); if (chr == '"') { fill = writeEntity(m_quotEntityBytes, fill); } else if (chr == '&') { @@ -232,11 +233,12 @@ protected void writeAttributeText(String text) throws IOException { */ public void writeTextContent(String text) throws IOException { flagTextContent(); - int length = text.length(); + final int length = text.length(); makeSpace(length * 5); int fill = m_fillOffset; - for (int i = 0; i < length; i++) { - char chr = text.charAt(i); + int chr; + for (int i = 0; i < length; i += Character.charCount(chr)) { + chr = text.codePointAt(i); if (chr == '&') { fill = writeEntity(m_ampEntityBytes, fill); } else if (chr == '<') { @@ -291,12 +293,13 @@ public void writeTextContent(String text) throws IOException { */ public void writeCData(String text) throws IOException { flagTextContent(); - int length = text.length(); + final int length = text.length(); makeSpace(length + 12); int fill = m_fillOffset; fill = writeEntity(m_cdataStartBytes, fill); - for (int i = 0; i < length; i++) { - char chr = text.charAt(i); + int chr; + for (int i = 0; i < length; i += Character.charCount(chr)) { + chr = text.codePointAt(i); if (chr == '>' && i > 2 && text.charAt(i-1) == ']' && text.charAt(i-2) == ']') { throw new IOException("Sequence \"]]>\" is not allowed " + diff --git a/build/src/org/jibx/runtime/impl/UTF8StreamWriter.java b/build/src/org/jibx/runtime/impl/UTF8StreamWriter.java index 45b7e1a..873827a 100644 --- a/build/src/org/jibx/runtime/impl/UTF8StreamWriter.java +++ b/build/src/org/jibx/runtime/impl/UTF8StreamWriter.java @@ -187,11 +187,12 @@ protected void defineNamespace(int index, String prefix) { * @throws IOException if error writing to document */ protected void writeAttributeText(String text) throws IOException { - int length = text.length(); + final int length = text.length(); makeSpace(length * 6); int fill = m_fillOffset; - for (int i = 0; i < length; i++) { - char chr = text.charAt(i); + int chr; + for (int i = 0; i < length; i += Character.charCount(chr)) { + chr = text.codePointAt(i); if (chr == '"') { fill = writeEntity(m_quotEntityBytes, fill); } else if (chr == '&') { @@ -218,6 +219,13 @@ protected void writeAttributeText(String text) throws IOException { throw new IOException("Illegal character code 0x" + Integer.toHexString(chr) + " in attribute value text"); + } else if (Character.isSupplementaryCodePoint(chr)) { + m_buffer[fill++] = (byte)(0xF0 + ((chr >> 18))); + m_buffer[fill++] = + (byte)(0x80 + ((chr >> 12) & 0x3F)); + m_buffer[fill++] = + (byte)(0x80 + ((chr >> 6) & 0x3F)); + m_buffer[fill++] = (byte)(0x80 + (chr & 0x3F)); } else { m_buffer[fill++] = (byte)(0xE0 + (chr >> 12)); m_buffer[fill++] = @@ -244,11 +252,12 @@ protected void writeAttributeText(String text) throws IOException { */ public void writeTextContent(String text) throws IOException { flagTextContent(); - int length = text.length(); + final int length = text.length(); makeSpace(length * 5); int fill = m_fillOffset; - for (int i = 0; i < length; i++) { - char chr = text.charAt(i); + int chr; + for (int i = 0; i < length; i += Character.charCount(chr)) { + chr = text.codePointAt(i); if (chr == '&') { fill = writeEntity(m_ampEntityBytes, fill); } else if (chr == '<') { @@ -270,6 +279,13 @@ public void writeTextContent(String text) throws IOException { chr == 0xFFFF || chr > 0x10FFFF)) { throw new IOException("Illegal character code 0x" + Integer.toHexString(chr) + " in content text"); + } else if (Character.isSupplementaryCodePoint(chr)) { + m_buffer[fill++] = (byte)(0xF0 + ((chr >> 18))); + m_buffer[fill++] = + (byte)(0x80 + ((chr >> 12) & 0x3F)); + m_buffer[fill++] = + (byte)(0x80 + ((chr >> 6) & 0x3F)); + m_buffer[fill++] = (byte)(0x80 + (chr & 0x3F)); } else { m_buffer[fill++] = (byte)(0xE0 + (chr >> 12)); m_buffer[fill++] = @@ -296,12 +312,13 @@ public void writeTextContent(String text) throws IOException { */ public void writeCData(String text) throws IOException { flagTextContent(); - int length = text.length(); + final int length = text.length(); makeSpace(length * 3 + 12); int fill = m_fillOffset; fill = writeEntity(m_cdataStartBytes, fill); - for (int i = 0; i < length; i++) { - char chr = text.charAt(i); + int chr; + for (int i = 0; i < length; i += Character.charCount(chr)) { + chr = text.codePointAt(i); if (chr == '>' && i > 2 && text.charAt(i-1) == ']' && text.charAt(i-2) == ']') { throw new IOException("Sequence \"]]>\" is not allowed " + @@ -321,6 +338,13 @@ public void writeCData(String text) throws IOException { throw new IOException("Illegal character code 0x" + Integer.toHexString(chr) + " in CDATA section text"); + } else if (Character.isSupplementaryCodePoint(chr)) { + m_buffer[fill++] = (byte)(0xF0 + ((chr >> 18))); + m_buffer[fill++] = + (byte)(0x80 + ((chr >> 12) & 0x3F)); + m_buffer[fill++] = + (byte)(0x80 + ((chr >> 6) & 0x3F)); + m_buffer[fill++] = (byte)(0x80 + (chr & 0x3F)); } else { m_buffer[fill++] = (byte)(0xE0 + (chr >> 12)); m_buffer[fill++] =