diff --git a/src/JSON.cc b/src/JSON.cc index 9c008ff..63b4f7d 100644 --- a/src/JSON.cc +++ b/src/JSON.cc @@ -190,6 +190,15 @@ JSON JSON::parse(StringReader& r, bool disable_extensions) { data.push_back('\r'); } else if (ch == 't') { data.push_back('\t'); + } else if (ch == 'x') { + uint8_t value; + try { + value = value_for_hex_char(r.get_s8()) << 4; + value |= value_for_hex_char(r.get_s8()); + } catch (const out_of_range&) { + throw parse_error("incomplete hex escape sequence in string; pos=" + to_string(r.where())); + } + data.push_back(value); } else if (ch == 'u') { uint16_t value; try { @@ -204,6 +213,7 @@ JSON JSON::parse(StringReader& r, bool disable_extensions) { if (value & 0xFF00) { throw parse_error("non-ascii unicode character sequence in string; pos=" + to_string(r.where())); } + data.push_back(value); } else { throw parse_error("invalid escape sequence in string; pos=" + to_string(r.where())); } @@ -245,7 +255,7 @@ JSON JSON::parse(const string& s, bool disable_extensions) { return JSON::parse(s.data(), s.size(), disable_extensions); } -string escape_json_string(const string& s) { +string escape_json_string(const string& s, bool use_hex_escapes) { string ret; for (auto ch : s) { if (ch == '\"') { @@ -263,7 +273,11 @@ string escape_json_string(const string& s) { } else if (ch == '\t') { ret += "\\t"; } else if ((ch < 0x20) || (ch > 0x7E)) { - ret += string_printf("\\u%04hhX", ch); + if (use_hex_escapes) { + ret += string_printf("\\x%02hhX", ch); + } else { + ret += string_printf("\\u%04hhX", ch); + } } else { ret += ch; } @@ -272,6 +286,8 @@ string escape_json_string(const string& s) { } string JSON::serialize(uint32_t options, size_t indent_level) const { + bool use_hex_escapes = options & SerializeOption::HEX_ESCAPE_CODES; + size_t type_index = this->value.index(); switch (type_index) { case 0: // nullptr_t @@ -302,7 +318,7 @@ string JSON::serialize(uint32_t options, size_t indent_level) const { } case 4: // string - return "\"" + escape_json_string(this->as_string()) + "\""; + return "\"" + escape_json_string(this->as_string(), use_hex_escapes) + "\""; case 5: { // list_type bool format = options & SerializeOption::FORMAT; @@ -345,9 +361,9 @@ string JSON::serialize(uint32_t options, size_t indent_level) const { ret += ','; } if (format) { - ret += '\n' + string(indent_level + 2, ' ') + "\"" + escape_json_string(key) + "\": " + value.serialize(options, indent_level + 2); + ret += '\n' + string(indent_level + 2, ' ') + "\"" + escape_json_string(key, use_hex_escapes) + "\": " + value.serialize(options, indent_level + 2); } else { - ret += "\"" + escape_json_string(key) + "\":" + value.serialize(options); + ret += "\"" + escape_json_string(key, use_hex_escapes) + "\":" + value.serialize(options); } }; diff --git a/src/JSON.hh b/src/JSON.hh index 0427d86..e28f805 100644 --- a/src/JSON.hh +++ b/src/JSON.hh @@ -12,7 +12,7 @@ #include "Strings.hh" #include "Types.hh" -std::string escape_json_string(const std::string& s); +std::string escape_json_string(const std::string& s, bool use_hex_escapes = false); class JSON { public: @@ -138,22 +138,26 @@ public: // This option adds whitespace and line breaks to the output to make it // easier for humans to read. The output is still standard-compliant. - FORMAT = 4, + FORMAT = 0x04, // If this is enabled, all integers are serialized in hexadecimal. This is // not standard-compliant, but JSON::parse can parse output generated with // this option if disable_extensions is false (the default). - HEX_INTEGERS = 1, + HEX_INTEGERS = 0x01, // If this is enabled, null, true, and false are serialized as single // characters (n, t, and f). This is not standard-compliant, but JSON::parse // can parse output generated with this option if disable_extensions is // false (the default). - ONE_CHARACTER_TRIVIAL_CONSTANTS = 2, + ONE_CHARACTER_TRIVIAL_CONSTANTS = 0x02, // If this is enabled, keys in dictionaries are sorted. If not enabled, // keys are serialized in the order they're stored, which is arbitrary. // Sorting takes a bit of extra time and memory, so if the resulting JSON // isn't expected to be read by a human, it's often not worth it. When this // is enabled, the output is still standard-compliant. - SORT_DICT_KEYS = 8, + SORT_DICT_KEYS = 0x08, + // If this is enabled, non-ASCII bytes in strings are encoded in a shorter + // form with the \x code rather than the \u code. This is not standard- + // compliant. + HEX_ESCAPE_CODES = 0x10, }; std::string serialize(uint32_t options = 0, size_t indent_level = 0) const;