Skip to content

Commit

Permalink
Fix issue 1098: HighSurrogate can be the last char in the data window
Browse files Browse the repository at this point in the history
  • Loading branch information
asomov committed Sep 17, 2024
1 parent ebcfd6c commit d82139c
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 20 deletions.
8 changes: 4 additions & 4 deletions src/main/java/org/yaml/snakeyaml/reader/StreamReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -175,19 +175,19 @@ private boolean ensureEnoughData(int size) {

private void update() {
try {
int read = stream.read(buffer, 0, buffer.length - 1); // FIXME why -1 ???
// read one less because the last char may be HighSurrogate
int read = stream.read(buffer, 0, buffer.length - 1);
if (read > 0) {
int cpIndex = (dataLength - pointer);
dataWindow = Arrays.copyOfRange(dataWindow, pointer, dataLength + read);

if (Character.isHighSurrogate(buffer[read - 1])) {
if (stream.read(buffer, read, 1) == -1) {
eof = true;
throw new ReaderException(name, index + read, buffer[read - 1],
"The last char is HighSurrogate (no LowSurrogate detected).");
} else {
read++;
}
}

int nonPrintable = ' ';
for (int i = 0; i < read; cpIndex++) {
int codePoint = Character.codePointAt(buffer, i);
Expand Down
52 changes: 36 additions & 16 deletions src/test/java/org/yaml/snakeyaml/reader/ReaderStringTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
*/
package org.yaml.snakeyaml.reader;

import java.io.StringReader;
import junit.framework.TestCase;

import java.io.StringReader;

public class ReaderStringTest extends TestCase {

public void testCheckPrintable() {
Expand All @@ -40,25 +41,45 @@ public void testCheckNonPrintable() {
}

/**
* test that Reading date and checking String work the same
* test reading all the chars
*/
public void testCheckAll() {
int counterSurrogates = 0;
for (char i = 0; i < 256 * 256 - 1; i++) {
char[] chars = new char[1];
chars[0] = i;
String str = new String(chars);
boolean regularExpressionResult = StreamReader.isPrintable(str);
if (Character.isHighSurrogate(i)) {
counterSurrogates++;
} else {
char[] chars = new char[1];
chars[0] = i;
String str = new String(chars);
boolean regularExpressionResult = StreamReader.isPrintable(str);

boolean charsArrayResult = true;
try {
new StreamReader(new StringReader(str)).peek();
} catch (Exception e) {
String error = e.getMessage();
assertTrue(error, error.startsWith("unacceptable character")
|| error.equals("special characters are not allowed"));
charsArrayResult = false;
}
assertEquals("Failed for #" + i, regularExpressionResult, charsArrayResult);
}
}
// https://en.wikipedia.org/wiki/Universal_Character_Set_characters
assertEquals("There are 1024 high surrogates (D800–DBFF)", 1024, counterSurrogates);
}

boolean charsArrayResult = true;
try {
new StreamReader(new StringReader(str)).peek();
} catch (Exception e) {
String error = e.getMessage();
assertTrue(error, error.startsWith("unacceptable character")
|| error.equals("special characters are not allowed"));
charsArrayResult = false;
public void testHighSurrogateAlone() {
StreamReader reader = new StreamReader("test\uD800");
try {
while (reader.peek() > 0) {
reader.forward(1);
}
assertEquals("Failed for #" + i, regularExpressionResult, charsArrayResult);
} catch (ReaderException e) {
assertTrue(e.toString()
.contains("(0xD800) The last char is HighSurrogate (no LowSurrogate detected)"));
assertEquals(5, e.getPosition());
}
}

Expand Down Expand Up @@ -90,5 +111,4 @@ public void testPeekInt() {
assertEquals('s', reader.peek(1));
assertEquals('t', reader.peek(2));
}

}

0 comments on commit d82139c

Please sign in to comment.