Skip to content

Commit

Permalink
fix unicode escapes in JSON strings
Browse files Browse the repository at this point in the history
  • Loading branch information
fuzziqersoftware committed Dec 30, 2023
1 parent e0ed30c commit ddd3e54
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 10 deletions.
26 changes: 21 additions & 5 deletions src/JSON.cc
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,15 @@ JSON JSON::parse(StringReader& r, bool disable_extensions) {
data.push_back('\r');
} else if (ch == 't') {
data.push_back('\t');
} else if (ch == 'x') {
uint8_t value;
try {
value = value_for_hex_char(r.get_s8()) << 4;
value |= value_for_hex_char(r.get_s8());
} catch (const out_of_range&) {
throw parse_error("incomplete hex escape sequence in string; pos=" + to_string(r.where()));
}
data.push_back(value);
} else if (ch == 'u') {
uint16_t value;
try {
Expand All @@ -204,6 +213,7 @@ JSON JSON::parse(StringReader& r, bool disable_extensions) {
if (value & 0xFF00) {
throw parse_error("non-ascii unicode character sequence in string; pos=" + to_string(r.where()));
}
data.push_back(value);
} else {
throw parse_error("invalid escape sequence in string; pos=" + to_string(r.where()));
}
Expand Down Expand Up @@ -245,7 +255,7 @@ JSON JSON::parse(const string& s, bool disable_extensions) {
return JSON::parse(s.data(), s.size(), disable_extensions);
}

string escape_json_string(const string& s) {
string escape_json_string(const string& s, bool use_hex_escapes) {
string ret;
for (auto ch : s) {
if (ch == '\"') {
Expand All @@ -263,7 +273,11 @@ string escape_json_string(const string& s) {
} else if (ch == '\t') {
ret += "\\t";
} else if ((ch < 0x20) || (ch > 0x7E)) {
ret += string_printf("\\u%04hhX", ch);
if (use_hex_escapes) {
ret += string_printf("\\x%02hhX", ch);
} else {
ret += string_printf("\\u%04hhX", ch);
}
} else {
ret += ch;
}
Expand All @@ -272,6 +286,8 @@ string escape_json_string(const string& s) {
}

string JSON::serialize(uint32_t options, size_t indent_level) const {
bool use_hex_escapes = options & SerializeOption::HEX_ESCAPE_CODES;

size_t type_index = this->value.index();
switch (type_index) {
case 0: // nullptr_t
Expand Down Expand Up @@ -302,7 +318,7 @@ string JSON::serialize(uint32_t options, size_t indent_level) const {
}

case 4: // string
return "\"" + escape_json_string(this->as_string()) + "\"";
return "\"" + escape_json_string(this->as_string(), use_hex_escapes) + "\"";

case 5: { // list_type
bool format = options & SerializeOption::FORMAT;
Expand Down Expand Up @@ -345,9 +361,9 @@ string JSON::serialize(uint32_t options, size_t indent_level) const {
ret += ',';
}
if (format) {
ret += '\n' + string(indent_level + 2, ' ') + "\"" + escape_json_string(key) + "\": " + value.serialize(options, indent_level + 2);
ret += '\n' + string(indent_level + 2, ' ') + "\"" + escape_json_string(key, use_hex_escapes) + "\": " + value.serialize(options, indent_level + 2);
} else {
ret += "\"" + escape_json_string(key) + "\":" + value.serialize(options);
ret += "\"" + escape_json_string(key, use_hex_escapes) + "\":" + value.serialize(options);
}
};

Expand Down
14 changes: 9 additions & 5 deletions src/JSON.hh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include "Strings.hh"
#include "Types.hh"

std::string escape_json_string(const std::string& s);
std::string escape_json_string(const std::string& s, bool use_hex_escapes = false);

class JSON {
public:
Expand Down Expand Up @@ -138,22 +138,26 @@ public:

// This option adds whitespace and line breaks to the output to make it
// easier for humans to read. The output is still standard-compliant.
FORMAT = 4,
FORMAT = 0x04,
// If this is enabled, all integers are serialized in hexadecimal. This is
// not standard-compliant, but JSON::parse can parse output generated with
// this option if disable_extensions is false (the default).
HEX_INTEGERS = 1,
HEX_INTEGERS = 0x01,
// If this is enabled, null, true, and false are serialized as single
// characters (n, t, and f). This is not standard-compliant, but JSON::parse
// can parse output generated with this option if disable_extensions is
// false (the default).
ONE_CHARACTER_TRIVIAL_CONSTANTS = 2,
ONE_CHARACTER_TRIVIAL_CONSTANTS = 0x02,
// If this is enabled, keys in dictionaries are sorted. If not enabled,
// keys are serialized in the order they're stored, which is arbitrary.
// Sorting takes a bit of extra time and memory, so if the resulting JSON
// isn't expected to be read by a human, it's often not worth it. When this
// is enabled, the output is still standard-compliant.
SORT_DICT_KEYS = 8,
SORT_DICT_KEYS = 0x08,
// If this is enabled, non-ASCII bytes in strings are encoded in a shorter
// form with the \x code rather than the \u code. This is not standard-
// compliant.
HEX_ESCAPE_CODES = 0x10,
};
std::string serialize(uint32_t options = 0, size_t indent_level = 0) const;

Expand Down

0 comments on commit ddd3e54

Please sign in to comment.