Skip to content

Commit

Permalink
Let json:decode/3 keep whitespaces
Browse files Browse the repository at this point in the history
`json:decode/3` always stripped leading whitespaces in the `Rest` binary,
which could be problematic if user expected them.

E.g `json:decode(<<"foo\n bar">>, ok, #{})` returned:
    `{<<"foo">>, ok, <<"bar">>}` instead of
    `{<<"foo">>, ok, <<"\n bar">>}`.

If `Rest` only contains whitespaces they are removed, so that the user
can match on empty binary to know if they should continue the decoding loop.

E.g `json:decode(<<"foo\n  ">>, ok, #{})` still returns:
    `{<<"foo">>, ok, <<>>}`
  • Loading branch information
dgud committed Sep 13, 2024
1 parent b6fe0ae commit ceb3c11
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 11 deletions.
12 changes: 7 additions & 5 deletions lib/stdlib/src/json.erl
Original file line number Diff line number Diff line change
Expand Up @@ -1386,16 +1386,18 @@ object_key(_, Original, Skip, Acc, Stack, Decode) ->

continue(<<Rest/bits>>, Original, Skip, Acc, Stack0, Decode, Value) ->
case Stack0 of
[] -> terminate(Rest, Original, Skip, Acc, Value);
[] -> terminate(Rest, Rest, Acc, Value);
[?ARRAY | _] -> array_push(Rest, Original, Skip, Acc, Stack0, Decode, Value);
[?OBJECT | _] -> object_value(Rest, Original, Skip, Acc, Stack0, Decode, Value);
[Key | Stack] -> object_push(Rest, Original, Skip, Acc, Stack, Decode, Value, Key)
end.

terminate(<<Byte, Rest/bits>>, Original, Skip, Acc, Value) when ?is_ws(Byte) ->
terminate(Rest, Original, Skip + 1, Acc, Value);
terminate(<<Rest/bits>>, _Original, _Skip, Acc, Value) ->
{Value, Acc, Rest}.
terminate(<<Byte, Rest/bits>>, Original, Acc, Value) when ?is_ws(Byte) ->
terminate(Rest, Original, Acc, Value);
terminate(<<>>, _, Acc, Value) ->
{Value, Acc, <<>>};
terminate(<<_/bits>>, Original, Acc, Value) ->
{Value, Acc, Original}.

-spec unexpected_utf8(binary(), non_neg_integer()) -> no_return().
unexpected_utf8(Original, Skip) when byte_size(Original) =:= Skip ->
Expand Down
27 changes: 21 additions & 6 deletions lib/stdlib/test/json_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@
property_escape_all/1
]).


-define(is_ws(X), X =:= $\s; X =:= $\t; X =:= $\r; X =:= $\n).

suite() ->
[
{ct_hooks, [ts_install_cth]},
Expand Down Expand Up @@ -646,7 +649,7 @@ test_decode_whitespace(_Config) ->

%% add extra whitespace
ews(Str) ->
unicode:characters_to_binary(string:replace(Str, <<" ">>, <<" \s\t\r\n">>)).
unicode:characters_to_binary(string:replace(Str, <<" ">>, <<" \s\t\r\n">>, all)).

test_decode_api(_Config) ->
put(history, []),
Expand Down Expand Up @@ -757,10 +760,15 @@ test_decode_api_stream(_Config) ->
"numbers": [1, -10, 0.0, -0.0, 2.0, -2.0, 31e2, 31e-2, 0.31e2, -0.31e2, 0.13e-2],
"strings": ["three", "åäö", "mixed_Ω"],
"escaped": ["\\n", "\\u2603", "\\ud834\\uDD1E", "\\n\xc3\xb1"]
}#,
}
#,
ok = stream_decode(Types),

Multiple = ~#12345 1.30 "String1" -0.31e2\n["an array"]12345#,
{12345, ok, B1} = json:decode(ews(~# 12345 "foo" #), ok, #{}),
<<" \s\t\r\n", _/binary>> = B1,
{<<"foo">>, ok, <<>>} = json:decode(B1, ok, #{}),

Multiple = ~#12345 1.30 "String1" -0.31e2\n["an array"]12345\n#,
ok = multi_stream_decode(Multiple),
ok.

Expand Down Expand Up @@ -794,22 +802,29 @@ multi_stream_decode(Strs) ->
{R1, [], ContBin} ->
multi_stream_decode(ContBin);
Other ->
io:format("~p '~ts'~n~p~n", [R1,ContBin, Other]),
io:format("~p '~tp'~n~p~n", [R1,ContBin, Other]),
error
end.

byte_loop(Bin) ->
{continue, State} = json:decode_start(<<>>, [], #{}),
byte_loop(Bin, State, []).

byte_loop(<<Byte, Rest/binary>>, State0, Bytes) ->
byte_loop(<<Byte, Rest/binary>> = Orig, State0, Bytes) ->
%% io:format("cont with '~s' ~p~n",[lists:reverse([Byte|Bytes]), State0]),
case json:decode_continue(<<Byte>>, State0) of
{continue, State} ->
byte_loop(Rest, State, [Byte|Bytes]);
{Result, [], <<>>} ->
%% trim to match the binary in return value
{Result, [], string:trim(Rest, leading)}
case string:trim(Rest, leading) of
<<>> ->
{Result, [], <<>>};
_ when ?is_ws(Byte) ->
{Result, [], Orig};
_ ->
{Result, [], Rest}
end
end;
byte_loop(<<>>, State, _Bytes) ->
json:decode_continue(end_of_input, State).
Expand Down

0 comments on commit ceb3c11

Please sign in to comment.