From 61f2e8d45b4e43f7aa898a1ba0f872beccb9775c Mon Sep 17 00:00:00 2001 From: Feotov Daniil Date: Mon, 7 Apr 2014 17:38:11 +0400 Subject: [PATCH 1/4] Fixed error in ucs_to_utf --- c_src/jsonx_str.h | 2 +- test/str_tests.erl | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/c_src/jsonx_str.h b/c_src/jsonx_str.h index 606eb8d..50dde5d 100644 --- a/c_src/jsonx_str.h +++ b/c_src/jsonx_str.h @@ -103,7 +103,7 @@ ucs_to_utf8(unsigned char* ptr, unsigned ucs){ *(ptr++) = (unsigned char) 0xC0 + (ucs >> 6); *(ptr++) = (unsigned char) 0x80 + (ucs & 0x3F); return ptr; - }else if(ucs < 0x1000) { + }else if(ucs < 0x10000) { // 1110xxxx 10xyyyyy 10yyyyyy if(ucs < 0xD800 || (ucs > 0xDFFF && ucs < 0xFFFE)) { *(ptr++) = (unsigned char) 0xE0 + (ucs >> 12); diff --git a/test/str_tests.erl b/test/str_tests.erl index ba8eac1..36aa7df 100644 --- a/test/str_tests.erl +++ b/test/str_tests.erl @@ -33,6 +33,8 @@ encutf4_test() -> encutf5_test() -> {no_match,<<248,128,128,128,128>>} = jsonx:encode(<<248, 128, 128, 128, 128>>). +decutf_test() -> + <<237,129,172,235,166,176,236,138,164,237,139,177>> = jsonx:decode(<<"\"\\ud06c\\ub9b0\\uc2a4\\ud2f1\"">>). %% Test decode atoms dectrue_test() -> From d5b22478871d5e8e5afeba90ac425f70d8b41f81 Mon Sep 17 00:00:00 2001 From: Feotov Daniil Date: Thu, 10 Apr 2014 11:52:10 +0400 Subject: [PATCH 2/4] Parse utf16 surrogate pairs --- c_src/jsonx_str.h | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/c_src/jsonx_str.h b/c_src/jsonx_str.h index 50dde5d..15a5d31 100644 --- a/c_src/jsonx_str.h +++ b/c_src/jsonx_str.h @@ -124,6 +124,35 @@ ucs_to_utf8(unsigned char* ptr, unsigned ucs){ return NULL; } +static inline int +is_surrogate_start(int val){ + return ((0xD800 <= val) && (val <= 0xDBFF)); +} + +static inline int +next_surrogate_end(unsigned char *str){ + unsigned char *src = str; + src+=4; + if(*src == '\\'){ + unsigned hval; + src++; + if(*src == 'u'){ + src++; + if(ucs_from_4hex(src, &hval)) { + if((0xDC00 <= hval) && (hval <= 0xDFFF)){ + return hval; + } + } + } + } + return 0; +} + +static inline int +parse_surrogate(int start, int end){ + return (((start - 0xD800) << 10) + (end - 0xDC00) + 0x0010000); +} + static inline int check_with_unescape_jstr(unsigned char *str, unsigned char **endstr, unsigned char **endptr){ unsigned char c, k; @@ -174,8 +203,14 @@ check_with_unescape_jstr(unsigned char *str, unsigned char **endstr, unsigned ch case '\\': {src++; *dst++ = 92U; continue;} case 'u': { unsigned hval; + src++; + if(!ucs_from_4hex(src, &hval)) {goto error;} + unsigned sur_end; + if(is_surrogate_start(hval) && (sur_end = next_surrogate_end(src))){ + hval = parse_surrogate(hval, sur_end); + src += 6; + } else {goto error;} src++; - if(!ucs_from_4hex(src, &hval)) {goto error;} if(!(dst = ucs_to_utf8(dst, hval))) {goto error;} src += 4; continue; From 7f8138b1484326f85da2c06978560eb32770298f Mon Sep 17 00:00:00 2001 From: Feotov Daniil Date: Thu, 10 Apr 2014 12:35:27 +0400 Subject: [PATCH 3/4] Fix erorr in parsing surrogate pairs --- c_src/jsonx_str.h | 1 - 1 file changed, 1 deletion(-) diff --git a/c_src/jsonx_str.h b/c_src/jsonx_str.h index 15a5d31..88fc5cb 100644 --- a/c_src/jsonx_str.h +++ b/c_src/jsonx_str.h @@ -210,7 +210,6 @@ check_with_unescape_jstr(unsigned char *str, unsigned char **endstr, unsigned ch hval = parse_surrogate(hval, sur_end); src += 6; } else {goto error;} - src++; if(!(dst = ucs_to_utf8(dst, hval))) {goto error;} src += 4; continue; From 7044db6bebcae2bd4691f925f80b7687555c926f Mon Sep 17 00:00:00 2001 From: Feotov Daniil Date: Thu, 10 Apr 2014 14:16:55 +0400 Subject: [PATCH 4/4] Do not return error if not surrogate --- c_src/jsonx_str.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c_src/jsonx_str.h b/c_src/jsonx_str.h index 88fc5cb..4a7fd33 100644 --- a/c_src/jsonx_str.h +++ b/c_src/jsonx_str.h @@ -209,7 +209,7 @@ check_with_unescape_jstr(unsigned char *str, unsigned char **endstr, unsigned ch if(is_surrogate_start(hval) && (sur_end = next_surrogate_end(src))){ hval = parse_surrogate(hval, sur_end); src += 6; - } else {goto error;} + } if(!(dst = ucs_to_utf8(dst, hval))) {goto error;} src += 4; continue;