Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update RegressionTest to assume UTF-8. #1006

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 62 additions & 6 deletions src/main/java/emissary/core/IBaseDataObjectXmlCodecs.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PushbackReader;
import java.io.Reader;
import java.lang.reflect.Method;
import java.nio.charset.StandardCharsets;
import java.util.AbstractMap;
Expand Down Expand Up @@ -535,11 +540,11 @@ public void encode(final List<SeekableByteChannelFactory> values, final Element
private static Element protectedElementHash(final String name, final byte[] bytes) {
final Element element = new Element(name);

if (ByteUtil.hasNonPrintableValues(bytes)) {
if (requiresEncoding(bytes)) {
element.setAttribute(ENCODING_ATTRIBUTE_NAME, SHA256);
element.addContent(ByteUtil.sha256Bytes(bytes));
} else {
element.addContent(new String(bytes, StandardCharsets.ISO_8859_1));
element.addContent(new String(bytes, StandardCharsets.UTF_8));
}

return element;
Expand Down Expand Up @@ -777,15 +782,15 @@ public static Element protectedElement(final String name, final String string) {
public static Element protectedElementBase64(final String name, final byte[] bytes) {
final Element element = new Element(name);

if (ByteUtil.hasNonPrintableValues(bytes)) {
if (requiresEncoding(bytes)) {
String base64String = BASE64_NEW_LINE_STRING +
BASE64_ENCODER.encodeToString(bytes) +
BASE64_NEW_LINE_STRING;

element.setAttribute(ENCODING_ATTRIBUTE_NAME, BASE64);
element.addContent(base64String);
} else {
element.addContent(new String(bytes, StandardCharsets.ISO_8859_1));
element.addContent(new String(bytes, StandardCharsets.UTF_8));
}

return element;
Expand All @@ -803,11 +808,11 @@ public static Element protectedElementBase64(final String name, final byte[] byt
public static Element protectedElementSha256(final String name, final byte[] bytes) {
final Element element = new Element(name);

if (ByteUtil.hasNonPrintableValues(bytes)) {
if (requiresEncoding(bytes)) {
element.setAttribute(IBaseDataObjectXmlCodecs.ENCODING_ATTRIBUTE_NAME, IBaseDataObjectXmlCodecs.SHA256);
element.addContent(ByteUtil.sha256Bytes(bytes));
} else {
element.addContent(new String(bytes, StandardCharsets.ISO_8859_1));
element.addContent(new String(bytes, StandardCharsets.UTF_8));
}

return element;
Expand All @@ -826,4 +831,55 @@ public static Method getIbdoMethod(final String methodName, final Class<?>... pa
throws NoSuchMethodException {
return IBaseDataObject.class.getMethod(methodName, parameterTypes);
}

public static boolean requiresEncoding(final byte[] utf8Bytes) {
jdcove2 marked this conversation as resolved.
Show resolved Hide resolved
jdcove2 marked this conversation as resolved.
Show resolved Hide resolved
try (ByteArrayInputStream bais = new ByteArrayInputStream(utf8Bytes);
Reader r = new InputStreamReader(bais, StandardCharsets.UTF_8)) {
return requiresEncoding(r);
} catch (IOException e) {
LOGGER.warn("Could not read UTF-8 bytes!", e);
}

return true;
}

// https://stackoverflow.com/questions/3770117/what-is-the-range-of-unicode-printable-characters
public static boolean requiresEncoding(final Reader reader) throws IOException {
try (BufferedReader bufferedReader = new BufferedReader(reader);
PushbackReader pushbackReader = new PushbackReader(bufferedReader)) {
int codepoint;
while ((codepoint = nextCodepoint(pushbackReader)) != -1) {
if (('\u0000' <= codepoint && codepoint <= '\u0008') ||
('\u000E' <= codepoint && codepoint <= '\u001F') ||
('\u007F' <= codepoint && codepoint <= '\u009F') ||
('\u2000' <= codepoint && codepoint <= '\u200F') ||
('\u2028' <= codepoint && codepoint <= '\u202F') ||
('\u205F' <= codepoint && codepoint <= '\u206F') ||
codepoint == '\u3000' || codepoint == '\uFEFF' ||
codepoint == '\uFFFD') { // UTF-8 Error Replacement Character
return true;
}
}
}

return false;
}

public static int nextCodepoint(final PushbackReader pushbackReader) throws IOException {
int c1 = -1;

if (((c1 = pushbackReader.read()) != -1) &&
(Character.isHighSurrogate((char) c1))) {
int c2;
if ((c2 = pushbackReader.read()) != -1) {
if (Character.isLowSurrogate((char) c2)) {
return Character.toCodePoint((char) c1, (char) c2);
} else {
pushbackReader.unread(c2);
}
}
}

return c1;
}
}
28 changes: 14 additions & 14 deletions src/test/java/emissary/core/IBaseDataObjectXmlHelperTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ private static void setAllFieldsPrintable(final IBaseDataObject ibdo, final byte
ibdo.setFilename("Filename");
ibdo.setFileType("FileType");
ibdo.setFontEncoding("FontEncoding");
ibdo.setFooter("Footer".getBytes(StandardCharsets.ISO_8859_1));
ibdo.setHeader("Header".getBytes(StandardCharsets.ISO_8859_1));
ibdo.setFooter("Footer".getBytes(StandardCharsets.UTF_8));
ibdo.setHeader("Header".getBytes(StandardCharsets.UTF_8));
ibdo.setHeaderEncoding("HeaderEncoding");
ibdo.setId("Id");
ibdo.setNumChildren(9);
Expand All @@ -84,8 +84,8 @@ private static void setAllFieldsPrintable(final IBaseDataObject ibdo, final byte
ibdo.putParameter("Parameter1Key", "Parameter1Value");
ibdo.putParameter("Parameter2Key", Arrays.asList("Parameter2Value1", "Parameter2Value2"));
ibdo.putParameter("Parameter3Key", Arrays.asList(10L, 20L));
ibdo.addAlternateView("AlternateView1Key", "AlternateView1Value".getBytes(StandardCharsets.ISO_8859_1));
ibdo.addAlternateView("AlternateView11Key", "AlternateView11Value".getBytes(StandardCharsets.ISO_8859_1));
ibdo.addAlternateView("AlternateView1Key", "AlternateView1Value".getBytes(StandardCharsets.UTF_8));
ibdo.addAlternateView("AlternateView11Key", "AlternateView11Value".getBytes(StandardCharsets.UTF_8));
}

private static void setAllFieldsNonPrintable(final IBaseDataObject ibdo, final byte[] bytes) {
Expand All @@ -99,8 +99,8 @@ private static void setAllFieldsNonPrintable(final IBaseDataObject ibdo, final b
ibdo.setFilename("\001Filename");
ibdo.setFileType("\001FileType");
ibdo.setFontEncoding("\001FontEncoding");
ibdo.setFooter("\001Footer".getBytes(StandardCharsets.ISO_8859_1));
ibdo.setHeader("\001Header".getBytes(StandardCharsets.ISO_8859_1));
ibdo.setFooter("\001Footer".getBytes(StandardCharsets.UTF_8));
ibdo.setHeader("\001Header".getBytes(StandardCharsets.UTF_8));
ibdo.setHeaderEncoding("\001HeaderEncoding");
ibdo.setId("\001Id");
ibdo.setNumChildren(9);
Expand All @@ -114,9 +114,9 @@ private static void setAllFieldsNonPrintable(final IBaseDataObject ibdo, final b
ibdo.putParameter("\020Parameter1Key", "\020Parameter1Value");
ibdo.putParameter("\020Parameter2Key", "\020Parameter2Value");
ibdo.addAlternateView("\200AlternateView1Key",
"\200AlternateView1Value".getBytes(StandardCharsets.ISO_8859_1));
"\200AlternateView1Value".getBytes(StandardCharsets.UTF_8));
ibdo.addAlternateView("\200AlternateView11Key",
"\200AlternateView11Value".getBytes(StandardCharsets.ISO_8859_1));
"\200AlternateView11Value".getBytes(StandardCharsets.UTF_8));
}

@Test
Expand All @@ -125,7 +125,7 @@ void testParentIbdoAllFieldsChanged() throws Exception {
final IBaseDataObject expectedIbdo = new BaseDataObject();
final List<IBaseDataObject> expectedChildren = new ArrayList<>();
final List<IBaseDataObject> actualChildren = new ArrayList<>();
final byte[] bytes = "Data".getBytes(StandardCharsets.ISO_8859_1);
final byte[] bytes = "Data".getBytes(StandardCharsets.UTF_8);

setAllFieldsPrintable(expectedIbdo, bytes);

Expand All @@ -150,7 +150,7 @@ void testBase64Conversion() throws Exception {
final IBaseDataObject expectedIbdo = new BaseDataObject();
final List<IBaseDataObject> expectedChildren = new ArrayList<>();
final List<IBaseDataObject> actualChildren = new ArrayList<>();
final byte[] bytes = "\001Data".getBytes(StandardCharsets.ISO_8859_1);
final byte[] bytes = "\001Data".getBytes(StandardCharsets.UTF_8);

setAllFieldsNonPrintable(expectedIbdo, bytes);

Expand All @@ -164,10 +164,10 @@ void testBase64Conversion() throws Exception {
final IBaseDataObject sha256ActualIbdo = ibdoFromXmlFromIbdo(expectedIbdo, expectedChildren, initialIbdo,
actualChildren, SHA256_ELEMENT_ENCODERS);

expectedIbdo.setData(ByteUtil.sha256Bytes(bytes).getBytes(StandardCharsets.ISO_8859_1));
expectedIbdo.setData(ByteUtil.sha256Bytes(bytes).getBytes(StandardCharsets.UTF_8));

for (Entry<String, byte[]> entry : new TreeMap<>(expectedIbdo.getAlternateViews()).entrySet()) {
expectedIbdo.addAlternateView(entry.getKey(), ByteUtil.sha256Bytes(entry.getValue()).getBytes(StandardCharsets.ISO_8859_1));
expectedIbdo.addAlternateView(entry.getKey(), ByteUtil.sha256Bytes(entry.getValue()).getBytes(StandardCharsets.UTF_8));
}

final String sha256Diff = PlaceComparisonHelper.checkDifferences(expectedIbdo, sha256ActualIbdo, expectedChildren,
Expand All @@ -180,7 +180,7 @@ void testBase64Conversion() throws Exception {
void testLengthAttributeDefault() throws Exception {
final IBaseDataObject ibdo = new BaseDataObject();
final List<IBaseDataObject> children = new ArrayList<>();
final byte[] bytes = "Data".getBytes(StandardCharsets.ISO_8859_1);
final byte[] bytes = "Data".getBytes(StandardCharsets.UTF_8);

setAllFieldsPrintable(ibdo, bytes);

Expand All @@ -201,7 +201,7 @@ void testLengthAttributeDefault() throws Exception {
void testLengthAttributeHash() throws Exception {
final IBaseDataObject ibdo = new BaseDataObject();
final List<IBaseDataObject> children = new ArrayList<>();
final byte[] bytes = "Data".getBytes(StandardCharsets.ISO_8859_1);
final byte[] bytes = "Data".getBytes(StandardCharsets.UTF_8);

setAllFieldsNonPrintable(ibdo, bytes);

Expand Down
10 changes: 4 additions & 6 deletions src/test/java/emissary/test/core/junit5/RegressionTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ protected void checkAnswersPreHook(final Document answers, final IBaseDataObject
// touch up alternate views to match how their bytes would have encoded into the answer file
for (Entry<String, byte[]> entry : new TreeMap<>(payload.getAlternateViews()).entrySet()) {
Optional<String> viewSha256 = hashBytesIfNonPrintable(entry.getValue());
viewSha256.ifPresent(s -> payload.addAlternateView(entry.getKey(), s.getBytes(StandardCharsets.ISO_8859_1)));
viewSha256.ifPresent(s -> payload.addAlternateView(entry.getKey(), s.getBytes(StandardCharsets.UTF_8)));
}

// touch up primary view if necessary
Expand All @@ -198,10 +198,8 @@ protected void checkAnswersPreHook(final Document answers, final IBaseDataObject

if (attachments != null) {
for (final IBaseDataObject attachment : attachments) {
if (ByteUtil.hasNonPrintableValues(attachment.data())) {
Optional<String> attachmentSha256 = hashBytesIfNonPrintable(attachment.data());
attachmentSha256.ifPresent(s -> attachment.setData(s.getBytes(StandardCharsets.UTF_8)));
}
Optional<String> attachmentSha256 = hashBytesIfNonPrintable(attachment.data());
attachmentSha256.ifPresent(s -> attachment.setData(s.getBytes(StandardCharsets.UTF_8)));
}
}
}
Expand Down Expand Up @@ -233,7 +231,7 @@ protected void checkAnswersPreHookLogEvents(List<SimplifiedLogEvent> simplifiedL
* @return a value optionally containing the generated hash
*/
protected Optional<String> hashBytesIfNonPrintable(byte[] bytes) {
if (ArrayUtils.isNotEmpty(bytes) && ByteUtil.hasNonPrintableValues(bytes)) {
if (ArrayUtils.isNotEmpty(bytes) && IBaseDataObjectXmlCodecs.requiresEncoding(bytes)) {
return Optional.ofNullable(ByteUtil.sha256Bytes(bytes));
}

Expand Down