From 724d6a18cf0311a50220d10f698ffbaa4640b4f6 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 9 Oct 2024 15:54:23 -0400 Subject: [PATCH] [WIP] Support encoding and decoding JSONL datasets Signed-off-by: Juan Cruz Viotti --- src/command_encode.cc | 52 ++++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/src/command_encode.cc b/src/command_encode.cc index e0554fc7..b3a7332e 100644 --- a/src/command_encode.cc +++ b/src/command_encode.cc @@ -1,6 +1,7 @@ #include #include #include +#include #include #include // EXIT_SUCCESS @@ -33,18 +34,43 @@ auto sourcemeta::jsonschema::cli::encode( resolver(options, options.contains("h") || options.contains("http"))); const auto encoding{sourcemeta::jsonbinpack::load(schema)}; - const auto document{ - sourcemeta::jsontoolkit::from_file(options.at("").front())}; - - std::ofstream output_stream( - std::filesystem::weakly_canonical(options.at("").at(1)), - std::ios::binary); - output_stream.exceptions(std::ios_base::badbit); - sourcemeta::jsonbinpack::Encoder encoder{output_stream}; - encoder.write(document, encoding); - output_stream.flush(); - const auto size{output_stream.tellp()}; - output_stream.close(); - std::cerr << "size: " << size << " bytes\n"; + const std::filesystem::path document{options.at("").front()}; + if (document.extension() == ".jsonl") { + log_verbose(options) << "Interpreting input as JSONL: " + << std::filesystem::weakly_canonical(document).string() + << "\n"; + + auto stream{sourcemeta::jsontoolkit::read_file(document)}; + std::ofstream output_stream( + std::filesystem::weakly_canonical(options.at("").at(1)), + std::ios::binary); + output_stream.exceptions(std::ios_base::badbit); + sourcemeta::jsonbinpack::Encoder encoder{output_stream}; + std::size_t count{0}; + for (const auto &entry : sourcemeta::jsontoolkit::JSONL{stream}) { + log_verbose(options) << "Processing entry #" << count << "\n"; + encoder.write(entry, encoding); + count += 1; + } + + output_stream.flush(); + const auto size{output_stream.tellp()}; + output_stream.close(); + std::cerr << "size: " << size << " bytes\n"; + } else { + const auto entry{ + sourcemeta::jsontoolkit::from_file(options.at("").front())}; + std::ofstream output_stream( + std::filesystem::weakly_canonical(options.at("").at(1)), + std::ios::binary); + output_stream.exceptions(std::ios_base::badbit); + sourcemeta::jsonbinpack::Encoder encoder{output_stream}; + encoder.write(entry, encoding); + output_stream.flush(); + const auto size{output_stream.tellp()}; + output_stream.close(); + std::cerr << "size: " << size << " bytes\n"; + } + return EXIT_SUCCESS; }