From c142d08ea81bae32959fcecc0fac3d69caf04dec Mon Sep 17 00:00:00 2001 From: Dan Gudmundsson Date: Fri, 13 Sep 2024 11:32:43 +0200 Subject: [PATCH] Let json:decode/3 keep whitespaces json:decode/3 always stripped leading whitespaces in the Rest binary, which could be problematic if user expected them. E.g json:decode(<<"foo\n bar">>, ok, #{}) returned: {<<"foo">>, ok, <<"bar">>} instead of {<<"foo">>, ok, <<"\n bar">>}. If Rest only contains whitespaces they are removed, so that the user can match on empty binary to know if they should continue the decoding loop. E.g json:decode(<<"foo\n ">>, ok, #{}) still returns: {<<"foo">>, ok, <<>>} --- lib/stdlib/src/json.erl | 12 +++++++----- lib/stdlib/test/json_SUITE.erl | 27 +++++++++++++++++++++------ 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/lib/stdlib/src/json.erl b/lib/stdlib/src/json.erl index 04a20148576c..979f9446f952 100644 --- a/lib/stdlib/src/json.erl +++ b/lib/stdlib/src/json.erl @@ -1386,16 +1386,18 @@ object_key(_, Original, Skip, Acc, Stack, Decode) -> continue(<>, Original, Skip, Acc, Stack0, Decode, Value) -> case Stack0 of - [] -> terminate(Rest, Original, Skip, Acc, Value); + [] -> terminate(Rest, Rest, Acc, Value); [?ARRAY | _] -> array_push(Rest, Original, Skip, Acc, Stack0, Decode, Value); [?OBJECT | _] -> object_value(Rest, Original, Skip, Acc, Stack0, Decode, Value); [Key | Stack] -> object_push(Rest, Original, Skip, Acc, Stack, Decode, Value, Key) end. -terminate(<>, Original, Skip, Acc, Value) when ?is_ws(Byte) -> - terminate(Rest, Original, Skip + 1, Acc, Value); -terminate(<>, _Original, _Skip, Acc, Value) -> - {Value, Acc, Rest}. +terminate(<>, Original, Acc, Value) when ?is_ws(Byte) -> + terminate(Rest, Original, Acc, Value); +terminate(<<>>, _, Acc, Value) -> + {Value, Acc, <<>>}; +terminate(_, Original, Acc, Value) -> + {Value, Acc, Original}. -spec unexpected_utf8(binary(), non_neg_integer()) -> no_return(). unexpected_utf8(Original, Skip) when byte_size(Original) =:= Skip -> diff --git a/lib/stdlib/test/json_SUITE.erl b/lib/stdlib/test/json_SUITE.erl index c27b66792052..3ac56f1ed284 100644 --- a/lib/stdlib/test/json_SUITE.erl +++ b/lib/stdlib/test/json_SUITE.erl @@ -58,6 +58,9 @@ property_escape_all/1 ]). + +-define(is_ws(X), X =:= $\s; X =:= $\t; X =:= $\r; X =:= $\n). + suite() -> [ {ct_hooks, [ts_install_cth]}, @@ -646,7 +649,7 @@ test_decode_whitespace(_Config) -> %% add extra whitespace ews(Str) -> - unicode:characters_to_binary(string:replace(Str, <<" ">>, <<" \s\t\r\n">>)). + unicode:characters_to_binary(string:replace(Str, <<" ">>, <<" \s\t\r\n">>, all)). test_decode_api(_Config) -> put(history, []), @@ -757,10 +760,15 @@ test_decode_api_stream(_Config) -> "numbers": [1, -10, 0.0, -0.0, 2.0, -2.0, 31e2, 31e-2, 0.31e2, -0.31e2, 0.13e-2], "strings": ["three", "åäö", "mixed_Ω"], "escaped": ["\\n", "\\u2603", "\\ud834\\uDD1E", "\\n\xc3\xb1"] - }#, + } + #, ok = stream_decode(Types), - Multiple = ~#12345 1.30 "String1" -0.31e2\n["an array"]12345#, + {12345, ok, B1} = json:decode(ews(~# 12345 "foo" #), ok, #{}), + <<" \s\t\r\n", _/binary>> = B1, + {<<"foo">>, ok, <<>>} = json:decode(B1, ok, #{}), + + Multiple = ~#12345 1.30 "String1" -0.31e2\n["an array"]12345\n#, ok = multi_stream_decode(Multiple), ok. @@ -794,7 +802,7 @@ multi_stream_decode(Strs) -> {R1, [], ContBin} -> multi_stream_decode(ContBin); Other -> - io:format("~p '~ts'~n~p~n", [R1,ContBin, Other]), + io:format("~p '~tp'~n~p~n", [R1,ContBin, Other]), error end. @@ -802,14 +810,21 @@ byte_loop(Bin) -> {continue, State} = json:decode_start(<<>>, [], #{}), byte_loop(Bin, State, []). -byte_loop(<>, State0, Bytes) -> +byte_loop(<> = Orig, State0, Bytes) -> %% io:format("cont with '~s' ~p~n",[lists:reverse([Byte|Bytes]), State0]), case json:decode_continue(<>, State0) of {continue, State} -> byte_loop(Rest, State, [Byte|Bytes]); {Result, [], <<>>} -> %% trim to match the binary in return value - {Result, [], string:trim(Rest, leading)} + case string:trim(Rest, leading) of + <<>> -> + {Result, [], <<>>}; + _ when ?is_ws(Byte) -> + {Result, [], Orig}; + _ -> + {Result, [], Rest} + end end; byte_loop(<<>>, State, _Bytes) -> json:decode_continue(end_of_input, State).