diff --git a/src/command_encode.cc b/src/command_encode.cc index e0554fc7..1e436ab3 100644 --- a/src/command_encode.cc +++ b/src/command_encode.cc @@ -1,6 +1,7 @@ #include #include #include +#include #include #include // EXIT_SUCCESS @@ -33,18 +34,43 @@ auto sourcemeta::jsonschema::cli::encode( resolver(options, options.contains("h") || options.contains("http"))); const auto encoding{sourcemeta::jsonbinpack::load(schema)}; - const auto document{ - sourcemeta::jsontoolkit::from_file(options.at("").front())}; - - std::ofstream output_stream( - std::filesystem::weakly_canonical(options.at("").at(1)), - std::ios::binary); - output_stream.exceptions(std::ios_base::badbit); - sourcemeta::jsonbinpack::Encoder encoder{output_stream}; - encoder.write(document, encoding); - output_stream.flush(); - const auto size{output_stream.tellp()}; - output_stream.close(); - std::cerr << "size: " << size << " bytes\n"; + const std::filesystem::path document{options.at("").front()}; + if (document.extension() == ".jsonl") { + log_verbose(options) << "Interpreting input as JSONL: " + << std::filesystem::weakly_canonical(document).string() + << "\n"; + + auto stream{sourcemeta::jsontoolkit::read_file(document)}; + std::ofstream output_stream( + std::filesystem::weakly_canonical(options.at("").at(1)), + std::ios::binary); + output_stream.exceptions(std::ios_base::badbit); + sourcemeta::jsonbinpack::Encoder encoder{output_stream}; + std::size_t count{0}; + for (const auto &entry : sourcemeta::jsontoolkit::JSONL{stream}) { + log_verbose(options) << "Encoding entry #" << count << "\n"; + encoder.write(entry, encoding); + count += 1; + } + + output_stream.flush(); + const auto total_size{output_stream.tellp()}; + output_stream.close(); + std::cerr << "total size: " << total_size << " bytes\n"; + } else { + const auto entry{ + sourcemeta::jsontoolkit::from_file(options.at("").front())}; + std::ofstream output_stream( + std::filesystem::weakly_canonical(options.at("").at(1)), + std::ios::binary); + output_stream.exceptions(std::ios_base::badbit); + sourcemeta::jsonbinpack::Encoder encoder{output_stream}; + encoder.write(entry, encoding); + output_stream.flush(); + const auto size{output_stream.tellp()}; + output_stream.close(); + std::cerr << "size: " << size << " bytes\n"; + } + return EXIT_SUCCESS; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 416a02da..bd3c1e1a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -174,6 +174,8 @@ add_jsonschema_test_unix(canonicalize/fail_unknown_metaschema) # Encode add_jsonschema_test_unix(encode/pass_schema_less) +add_jsonschema_test_unix(encode/pass_schema_less_jsonl) +add_jsonschema_test_unix(encode/pass_schema_less_jsonl_verbose) add_jsonschema_test_unix(encode/fail_no_document) add_jsonschema_test_unix(encode/fail_no_output) diff --git a/test/encode/pass_schema_less_jsonl.sh b/test/encode/pass_schema_less_jsonl.sh new file mode 100755 index 00000000..c113c205 --- /dev/null +++ b/test/encode/pass_schema_less_jsonl.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << 'EOF' > "$TMP/document.jsonl" +{ "count": 1 } +{ "count": 2 } +{ "count": 3 } +{ "count": 4 } +{ "count": 5 } +EOF + +"$1" encode "$TMP/document.jsonl" "$TMP/output.binpack" > "$TMP/output.txt" 2>&1 +xxd "$TMP/output.binpack" > "$TMP/output.hex" + +cat << 'EOF' > "$TMP/expected.txt" +00000000: 1306 636f 756e 7415 1300 091d 1300 0525 ..count........% +00000010: 1300 052d 1300 0535 ...-...5 +EOF + +cat << 'EOF' > "$TMP/expected-output.txt" +total size: 24 bytes +EOF + +diff "$TMP/expected.txt" "$TMP/output.hex" +diff "$TMP/output.txt" "$TMP/expected-output.txt" diff --git a/test/encode/pass_schema_less_jsonl_verbose.sh b/test/encode/pass_schema_less_jsonl_verbose.sh new file mode 100755 index 00000000..9b17ea1f --- /dev/null +++ b/test/encode/pass_schema_less_jsonl_verbose.sh @@ -0,0 +1,37 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << 'EOF' > "$TMP/document.jsonl" +{ "count": 1 } +{ "count": 2 } +{ "count": 3 } +{ "count": 4 } +{ "count": 5 } +EOF + +"$1" encode "$TMP/document.jsonl" "$TMP/output.binpack" --verbose > "$TMP/output.txt" 2>&1 +xxd "$TMP/output.binpack" > "$TMP/output.hex" + +cat << 'EOF' > "$TMP/expected.txt" +00000000: 1306 636f 756e 7415 1300 091d 1300 0525 ..count........% +00000010: 1300 052d 1300 0535 ...-...5 +EOF + +cat << EOF > "$TMP/expected-output.txt" +Interpreting input as JSONL: $(realpath "$TMP")/document.jsonl +Encoding entry #0 +Encoding entry #1 +Encoding entry #2 +Encoding entry #3 +Encoding entry #4 +total size: 24 bytes +EOF + +diff "$TMP/expected.txt" "$TMP/output.hex" +diff "$TMP/output.txt" "$TMP/expected-output.txt" diff --git a/vendor/jsonbinpack/src/runtime/encoder_string.cc b/vendor/jsonbinpack/src/runtime/encoder_string.cc index 4ff6e798..a42aebe6 100644 --- a/vendor/jsonbinpack/src/runtime/encoder_string.cc +++ b/vendor/jsonbinpack/src/runtime/encoder_string.cc @@ -20,7 +20,7 @@ auto Encoder::FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED( assert(document.is_string()); const sourcemeta::jsontoolkit::JSON::String value{document.to_string()}; const auto size{value.size()}; - assert(document.size() == size); + assert(document.byte_size() == size); const auto shared{this->cache_.find(value, Cache::Type::Standalone)}; // (1) Write 0x00 if shared, else do nothing