From d77d86a014b3778b74c735cfd6a55dbc26887951 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BD=D0=B4=D1=80=D0=B5=D0=B9=20=D0=91=D1=83=D0=B4?= =?UTF-8?q?=D0=B8=D0=BB=D0=BE=D0=B2=D1=81=D0=BA=D0=B8=D0=B9?= Date: Mon, 10 Feb 2025 19:28:55 +0000 Subject: [PATCH] - --- benchmarks/fuzzing.hpp | 4 +- benchmarks/rapid_parse_handler.hpp | 2 +- cmake/deps.cmake | 1 - gtest/parsers/test_oneof.cpp | 94 +++++++++++ include/tgbm/jsons/boostjson_dom_traits.hpp | 3 +- include/tgbm/jsons/parse_sax/common_api.hpp | 1 + .../jsons/parse_sax/discriminated_api.hpp | 45 ++++- include/tgbm/jsons/rapidjson_dom_traits.hpp | 3 +- include/tgbm/jsons/sax.hpp | 159 ++++++++++++++++-- include/tgbm/jsons/stream_parser.hpp | 1 + 10 files changed, 289 insertions(+), 24 deletions(-) diff --git a/benchmarks/fuzzing.hpp b/benchmarks/fuzzing.hpp index 4a66b9e6..b97a5aa3 100644 --- a/benchmarks/fuzzing.hpp +++ b/benchmarks/fuzzing.hpp @@ -212,7 +212,7 @@ struct Storage { file.exceptions(std::fstream::failbit | std::fstream::badbit); file.open(path); if (!file.is_open()) { - throw std::runtime_error("FUCK THIS SHIT"); + throw std::runtime_error(fmt::format("can't open file on path : {}", path.generic_string())); } file << *json; file.flush(); @@ -243,7 +243,7 @@ struct Storage { file.exceptions(std::fstream::failbit | std::fstream::badbit); file.open(path); if (!file.is_open()) { - throw std::runtime_error("FUCK THIS SHIT"); + throw std::runtime_error(fmt::format("can't open file on path : {}", path.generic_string())); } file << *json; file.flush(); diff --git a/benchmarks/rapid_parse_handler.hpp b/benchmarks/rapid_parse_handler.hpp index efa633ab..1ef49afa 100644 --- a/benchmarks/rapid_parse_handler.hpp +++ b/benchmarks/rapid_parse_handler.hpp @@ -211,7 +211,7 @@ T parse_handler(std::string_view sv) { details::rapid_sv_stream ss(sv); rapidjson::ParseResult parse_result = reader.Parse(ss, handler); if (parse_result.IsError()) { - throw std::runtime_error(fmt::format("Fail parse")); + throw_json_parse_error(); } return result; } diff --git a/cmake/deps.cmake b/cmake/deps.cmake index 005d0699..93439af6 100644 --- a/cmake/deps.cmake +++ b/cmake/deps.cmake @@ -17,7 +17,6 @@ CPMAddPackage( OPTIONS "BOOST_ENABLE_CMAKE ON" ) unset(BOOST_INCLUDE_LIBRARIES) -find_package(Boost 1.87 COMPONENTS system asio pfr json boost_container REQUIRED) CPMAddPackage("gh:fmtlib/fmt#11.0.2") diff --git a/gtest/parsers/test_oneof.cpp b/gtest/parsers/test_oneof.cpp index ee64e105..5b178857 100644 --- a/gtest/parsers/test_oneof.cpp +++ b/gtest/parsers/test_oneof.cpp @@ -281,4 +281,98 @@ JSON_PARSE_TEST(OneOfHiddenUserTypeMoreData, MessageOrigin) { EXPECT_EQ(parsed, expected); } +TEST(GeneratorBoost, MissplaceDiscriminator) { + MessageOrigin expected{ + .data = + MessageOriginChannel{ + .date = tgbm::api::Integer{1630454400}, + .sender_channel = + tgbm::box{ + Channel{ + .id = tgbm::api::Integer{13579}, + .name = "News Channel", + }, + }, + }, + }; + + auto json = R"( + { + "date": 1630454400, + "sender_channel": { + "id": 13579, + "name": "News Channel" + }, + "type": "channel" + } + )"; + + MessageOrigin parsed = GeneratorBoost::parse_json(json); + EXPECT_EQ(parsed, expected); +} + +TEST(GeneratorBoost, ArrayMissplaceDiscriminator) { + std::vector expected{ + MessageOrigin{ + .data = + MessageOriginChannel{ + .date = tgbm::api::Integer{1630454400}, + .sender_channel = + tgbm::box{ + Channel{ + .id = tgbm::api::Integer{13579}, + .name = "News Channel", + }, + }, + }, + }, + MessageOrigin{ + .data = + MessageOriginChat{ + .date = tgbm::api::Integer{1630454400}, + .sender_chat = + tgbm::box{ + Chat{ + .id = tgbm::api::Integer{67890}, + .title = "Group Chat", + }, + }, + }, + }, + }; + + auto json = R"( +[ + { + "date":1630454400, + "type":"channel", + "sender_channel":{ + "id":13579, + "name":"News Channel" + } + }, + { + "date":1630454400, + "sender_chat":{ + "id":67890, + "title":"Group Chat" + }, + "type":"chat" + } +])"; + + auto parsed = GeneratorBoost::parse_json>(json); + EXPECT_EQ(parsed, expected); +} + +JSON_PARSE_TEST(MissingField, MessageOrigin) { + MessageOrigin expected; + auto json = R"( +{ + "date": 1630454400 +} + )"; + EXPECT_THROW(parse_json(json), tgbm::json::parse_error); +} + } // namespace test_oneof diff --git a/include/tgbm/jsons/boostjson_dom_traits.hpp b/include/tgbm/jsons/boostjson_dom_traits.hpp index 6b02baaf..87f991c4 100644 --- a/include/tgbm/jsons/boostjson_dom_traits.hpp +++ b/include/tgbm/jsons/boostjson_dom_traits.hpp @@ -3,6 +3,7 @@ #include #include +#include #include namespace tgbm::json { @@ -64,7 +65,7 @@ struct dom_traits_for<::boost::json::value> { } static void on_error() { - throw std::runtime_error("JSON Error"); + throw_json_parse_error(); } static const type* find_field(const type& json, std::string_view key) { diff --git a/include/tgbm/jsons/parse_sax/common_api.hpp b/include/tgbm/jsons/parse_sax/common_api.hpp index 06ef5108..8b12c04e 100644 --- a/include/tgbm/jsons/parse_sax/common_api.hpp +++ b/include/tgbm/jsons/parse_sax/common_api.hpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace tgbm::json { diff --git a/include/tgbm/jsons/parse_sax/discriminated_api.hpp b/include/tgbm/jsons/parse_sax/discriminated_api.hpp index 7448bebb..61016c05 100644 --- a/include/tgbm/jsons/parse_sax/discriminated_api.hpp +++ b/include/tgbm/jsons/parse_sax/discriminated_api.hpp @@ -20,20 +20,57 @@ struct sax_parser { return v.discriminate(key, emplacer); } - static sax_consumer_t parse(T& v, sax_token& tok, dd::with_stack_resource r) { + static sax_consumer_t bufferized_parse(T& out, sax_token& tok, dd::with_stack_resource r) { + using enum sax_token::kind_e; + + assert(tok.got != key || tok.str_m != T::discriminator); + + std::vector buf_toks; + buf_toks.reserve(10); + { + sax_token token; + token.got = object_begin; + buf_toks.emplace_back(token); + } + + do { + buf_toks.emplace_back(tok); + co_yield {}; + } while (tok.got != key || tok.str_m != T::discriminator); + + co_yield {}; + tok.expect(string); + + auto gen_suboneof = get_generator_suboneof(tok.str_m, out, tok, r); + auto it = gen_suboneof.cur_iterator(); + for (auto& buf_tok : buf_toks) { + assert(it != gen_suboneof.end()); + tok = buf_tok.to_view(); + ++it; + } + + if (it != gen_suboneof.end()) { + co_yield {}; + co_yield dd::elements_of(gen_suboneof); + } + } + + static sax_consumer_t parse(T& out, sax_token& tok, dd::with_stack_resource r) { using enum sax_token::kind_e; tok.expect(object_begin); co_yield {}; if (tok.got == object_end) co_return; tok.expect(key); - if (tok.got != key || tok.str_m != T::discriminator) [[unlikely]] - json::throw_json_parse_error(); + if (tok.got != key || tok.str_m != T::discriminator) [[unlikely]] { + co_yield dd::elements_of(bufferized_parse(out, tok, r)); + co_return; + } co_yield {}; tok.expect(string); // change 'got' before generator creation (may be function returning generator) tok.got = object_begin; - co_yield dd::elements_of(get_generator_suboneof(tok.str_m, v, tok, r)); + co_yield dd::elements_of(get_generator_suboneof(tok.str_m, out, tok, r)); } }; diff --git a/include/tgbm/jsons/rapidjson_dom_traits.hpp b/include/tgbm/jsons/rapidjson_dom_traits.hpp index b9f4a46c..c2107086 100644 --- a/include/tgbm/jsons/rapidjson_dom_traits.hpp +++ b/include/tgbm/jsons/rapidjson_dom_traits.hpp @@ -9,6 +9,7 @@ #include #include +#include namespace tgbm::json { @@ -75,7 +76,7 @@ struct dom_traits_for>> { } static void on_error() { - throw std::runtime_error("JSON Error"); + throw_json_parse_error(); } static const type* find_field(const type& json, std::string_view key) { diff --git a/include/tgbm/jsons/sax.hpp b/include/tgbm/jsons/sax.hpp index 103c7d37..4d74c40d 100644 --- a/include/tgbm/jsons/sax.hpp +++ b/include/tgbm/jsons/sax.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -8,19 +9,6 @@ namespace tgbm::json { struct sax_token { - union { - double double_m; - int64_t int_m; - uint64_t uint_m; - std::string_view str_m; - bool bool_m; - }; - - sax_token() { - } - ~sax_token() { - } - enum kind_e : uint16_t { array_begin = 1 << 0, array_end = 1 << 1, @@ -36,15 +24,158 @@ struct sax_token { part = 1 << 11, }; - static constexpr std::int16_t all = ~0; + union { + double double_m; + int64_t int_m; + uint64_t uint_m; + std::string_view str_m; + bool bool_m; + }; + kind_e got; + sax_token() : got(object_begin) { + } + ~sax_token() { + } + void expect(std::int16_t tokens) const { if (!(got & tokens)) TGBM_JSON_PARSE_ERROR; } }; +struct sax_token_value { + union { + double double_m; + int64_t int_m; + uint64_t uint_m; + std::string str_m; + bool bool_m; + }; + + sax_token_value(sax_token token) { + using enum sax_token::kind_e; + got = token.got; + switch (got) { + case string: + case key: + case part: + std::construct_at(&str_m, token.str_m); + case array_begin: + case array_end: + case object_begin: + case object_end: + case null: + return; + case int64: + int_m = token.int_m; + return; + case uint64: + uint_m = token.int_m; + return; + case double_: + double_m = token.double_m; + return; + case bool_: + bool_m = token.bool_m; + return; + } + } + + sax_token_value() : got(sax_token::object_begin) { + } + sax_token_value(const sax_token_value&) = delete; + + sax_token_value(sax_token_value&& rhs) noexcept { + *this = std::move(rhs); + } + + sax_token_value& operator=(sax_token_value&& rhs) noexcept { + destroy(); + got = rhs.got; + if (rhs.is_string(rhs.got)) { + std::construct_at(&str_m, std::move(rhs.str_m)); + } else { + std::memcpy(this, &rhs, sizeof(*this)); + } + return *this; + } + + sax_token to_view() const noexcept { + using enum sax_token::kind_e; + sax_token token; + token.got = got; + switch (got) { + case string: + case key: + case part: + token.str_m = str_m; + case array_begin: + case array_end: + case object_begin: + case object_end: + case null: + break; + case int64: + token.int_m = int_m; + break; + case uint64: + token.uint_m = int_m; + break; + case double_: + token.double_m = double_m; + break; + case bool_: + token.bool_m = bool_m; + break; + } + + return token; + } + + bool is_string(sax_token::kind_e got) const noexcept { + using enum sax_token::kind_e; + switch (got) { + case string: + case key: + case part: + return true; + case array_begin: + case array_end: + case object_begin: + case object_end: + case int64: + case uint64: + case double_: + case bool_: + case null: + return false; + } + unreachable(); + } + + ~sax_token_value() { + destroy(); + } + + static constexpr std::int16_t all = ~0; + sax_token::kind_e got; + + void expect(std::int16_t tokens) const { + if (!(got & tokens)) + TGBM_JSON_PARSE_ERROR; + } + + private: + void destroy() noexcept { + if (is_string(got)) { + got = sax_token::object_begin; + std::destroy_at(&str_m); + } + } +}; + using sax_consumer_t = dd::generator; template diff --git a/include/tgbm/jsons/stream_parser.hpp b/include/tgbm/jsons/stream_parser.hpp index 87c04262..82fe0720 100644 --- a/include/tgbm/jsons/stream_parser.hpp +++ b/include/tgbm/jsons/stream_parser.hpp @@ -27,6 +27,7 @@ struct stream_parser { p.write_some(!end, data.data(), data.size(), ec); if (ec) [[unlikely]] return; + // The json output has ended, but the generator is still expecting more tokens if (end && !p.handler().is_done()) [[unlikely]] ec = ::boost::json::error::extra_data; }