From b8c9581ad98f097ca762f6a4ce9c650299f77cb4 Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sat, 9 Mar 2024 17:51:04 +0100 Subject: [PATCH 01/15] Make some space for another crate in the workspace --- .gitignore | 2 +- Cargo.toml | 56 ++----------------- serde_avro_fast/Cargo.toml | 51 +++++++++++++++++ .../object_container_file_encoding.rs | 0 .../benches}/single.rs | 0 .../src}/de/deserializer/allowed_depth.rs | 0 .../src}/de/deserializer/mod.rs | 0 .../src}/de/deserializer/types/blocks.rs | 0 .../src}/de/deserializer/types/boolean.rs | 0 .../src}/de/deserializer/types/decimal.rs | 0 .../de/deserializer/types/discriminant.rs | 0 .../src}/de/deserializer/types/duration.rs | 0 .../src}/de/deserializer/types/enums.rs | 0 .../de/deserializer/types/length_delimited.rs | 0 .../src}/de/deserializer/types/mod.rs | 0 .../src}/de/deserializer/types/record.rs | 0 .../src}/de/deserializer/types/union.rs | 0 .../deserializer/unit_variant_enum_access.rs | 0 {src => serde_avro_fast/src}/de/error.rs | 0 {src => serde_avro_fast/src}/de/mod.rs | 0 {src => serde_avro_fast/src}/de/read/mod.rs | 0 {src => serde_avro_fast/src}/de/read/take.rs | 0 {src => serde_avro_fast/src}/lib.rs | 0 .../object_container_file_encoding/mod.rs | 0 .../reader/decompression.rs | 0 .../reader/mod.rs | 0 .../writer/compression.rs | 0 .../writer/mod.rs | 0 .../writer/vectored_write_polyfill.rs | 0 {src => serde_avro_fast/src}/schema/error.rs | 0 {src => serde_avro_fast/src}/schema/mod.rs | 0 .../src}/schema/safe/canonical_form.rs | 0 .../src}/schema/safe/check_for_cycles.rs | 0 .../src}/schema/safe/mod.rs | 0 .../src}/schema/safe/parsing/mod.rs | 0 .../src}/schema/safe/parsing/raw.rs | 0 .../src}/schema/safe/rabin.rs | 0 .../src}/schema/safe/serialize.rs | 0 .../src}/schema/self_referential.rs | 0 .../schema/union_variants_per_type_lookup.rs | 0 {src => serde_avro_fast/src}/ser/error.rs | 0 {src => serde_avro_fast/src}/ser/mod.rs | 0 .../src}/ser/serializer/blocks.rs | 0 .../src}/ser/serializer/decimal.rs | 0 .../ser/serializer/extract_for_duration.rs | 0 .../src}/ser/serializer/mod.rs | 0 .../src}/ser/serializer/seq_or_tuple.rs | 0 .../src}/ser/serializer/struct_or_map.rs | 0 .../src}/single_object_encoding.rs | 0 {tests => serde_avro_fast/tests}/duration.rs | 0 .../tests}/from_benches.rs | 0 .../max_depth_prevents_stack_overflow.rs | 0 .../tests}/no_cyclic_debug_on_schema.rs | 0 .../tests}/object_container_file_encoding.rs | 0 .../tests}/round_trips.rs | 0 {tests => serde_avro_fast/tests}/schema.rs | 0 .../tests}/schema_construction.rs | 0 .../tests}/single_object_encoding.rs | 0 {tests => serde_avro_fast/tests}/unions.rs | 0 59 files changed, 57 insertions(+), 52 deletions(-) create mode 100644 serde_avro_fast/Cargo.toml rename {benches => serde_avro_fast/benches}/object_container_file_encoding.rs (100%) rename {benches => serde_avro_fast/benches}/single.rs (100%) rename {src => serde_avro_fast/src}/de/deserializer/allowed_depth.rs (100%) rename {src => serde_avro_fast/src}/de/deserializer/mod.rs (100%) rename {src => serde_avro_fast/src}/de/deserializer/types/blocks.rs (100%) rename {src => serde_avro_fast/src}/de/deserializer/types/boolean.rs (100%) rename {src => serde_avro_fast/src}/de/deserializer/types/decimal.rs (100%) rename {src => serde_avro_fast/src}/de/deserializer/types/discriminant.rs (100%) rename {src => serde_avro_fast/src}/de/deserializer/types/duration.rs (100%) rename {src => serde_avro_fast/src}/de/deserializer/types/enums.rs (100%) rename {src => serde_avro_fast/src}/de/deserializer/types/length_delimited.rs (100%) rename {src => serde_avro_fast/src}/de/deserializer/types/mod.rs (100%) rename {src => serde_avro_fast/src}/de/deserializer/types/record.rs (100%) rename {src => serde_avro_fast/src}/de/deserializer/types/union.rs (100%) rename {src => serde_avro_fast/src}/de/deserializer/unit_variant_enum_access.rs (100%) rename {src => serde_avro_fast/src}/de/error.rs (100%) rename {src => serde_avro_fast/src}/de/mod.rs (100%) rename {src => serde_avro_fast/src}/de/read/mod.rs (100%) rename {src => serde_avro_fast/src}/de/read/take.rs (100%) rename {src => serde_avro_fast/src}/lib.rs (100%) rename {src => serde_avro_fast/src}/object_container_file_encoding/mod.rs (100%) rename {src => serde_avro_fast/src}/object_container_file_encoding/reader/decompression.rs (100%) rename {src => serde_avro_fast/src}/object_container_file_encoding/reader/mod.rs (100%) rename {src => serde_avro_fast/src}/object_container_file_encoding/writer/compression.rs (100%) rename {src => serde_avro_fast/src}/object_container_file_encoding/writer/mod.rs (100%) rename {src => serde_avro_fast/src}/object_container_file_encoding/writer/vectored_write_polyfill.rs (100%) rename {src => serde_avro_fast/src}/schema/error.rs (100%) rename {src => serde_avro_fast/src}/schema/mod.rs (100%) rename {src => serde_avro_fast/src}/schema/safe/canonical_form.rs (100%) rename {src => serde_avro_fast/src}/schema/safe/check_for_cycles.rs (100%) rename {src => serde_avro_fast/src}/schema/safe/mod.rs (100%) rename {src => serde_avro_fast/src}/schema/safe/parsing/mod.rs (100%) rename {src => serde_avro_fast/src}/schema/safe/parsing/raw.rs (100%) rename {src => serde_avro_fast/src}/schema/safe/rabin.rs (100%) rename {src => serde_avro_fast/src}/schema/safe/serialize.rs (100%) rename {src => serde_avro_fast/src}/schema/self_referential.rs (100%) rename {src => serde_avro_fast/src}/schema/union_variants_per_type_lookup.rs (100%) rename {src => serde_avro_fast/src}/ser/error.rs (100%) rename {src => serde_avro_fast/src}/ser/mod.rs (100%) rename {src => serde_avro_fast/src}/ser/serializer/blocks.rs (100%) rename {src => serde_avro_fast/src}/ser/serializer/decimal.rs (100%) rename {src => serde_avro_fast/src}/ser/serializer/extract_for_duration.rs (100%) rename {src => serde_avro_fast/src}/ser/serializer/mod.rs (100%) rename {src => serde_avro_fast/src}/ser/serializer/seq_or_tuple.rs (100%) rename {src => serde_avro_fast/src}/ser/serializer/struct_or_map.rs (100%) rename {src => serde_avro_fast/src}/single_object_encoding.rs (100%) rename {tests => serde_avro_fast/tests}/duration.rs (100%) rename {tests => serde_avro_fast/tests}/from_benches.rs (100%) rename {tests => serde_avro_fast/tests}/max_depth_prevents_stack_overflow.rs (100%) rename {tests => serde_avro_fast/tests}/no_cyclic_debug_on_schema.rs (100%) rename {tests => serde_avro_fast/tests}/object_container_file_encoding.rs (100%) rename {tests => serde_avro_fast/tests}/round_trips.rs (100%) rename {tests => serde_avro_fast/tests}/schema.rs (100%) rename {tests => serde_avro_fast/tests}/schema_construction.rs (100%) rename {tests => serde_avro_fast/tests}/single_object_encoding.rs (100%) rename {tests => serde_avro_fast/tests}/unions.rs (100%) diff --git a/.gitignore b/.gitignore index 4fffb2f..96ef6c0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ /target -/Cargo.lock +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml index 77b8cc4..b55d20e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,51 +1,5 @@ -[package] - authors = ["Thomas BESSOU "] - description = "An idiomatic implementation of serde/avro (de)serialization" - edition = "2021" - license = "LGPL-3.0-only" - name = "serde_avro_fast" - repository = "https://github.com/Ten0/serde_avro_fast" - version = "1.0.0-rc.4" - -[features] - default = ["deflate"] - deflate = ["flate2"] - snappy = ["snap", "crc32fast"] - xz = ["xz2"] - zstandard = ["zstd"] - -[dependencies] - bzip2 = { version = "0.4", optional = true } - crc32fast = { version = "1", optional = true } - flate2 = { version = "1", optional = true } - integer-encoding = { default-features = false, version = "4" } - num-traits = "0.2" - rand = "0.8" - rust_decimal = { version = "1", default-features = false, features = ["std", "serde-with-str"] } - serde = "1" - serde-transcode = "1" - serde_derive = "1" - serde_json = "1" - serde_serializer_quick_unsupported = "0.1" - snap = { version = "1", optional = true } - thiserror = "1" - xz2 = { version = "0.1", optional = true } - zstd = { version = "0.13", optional = true } - -[dev-dependencies] - anyhow = "1" - apache-avro = { version = "0.14", features = ["bzip", "snappy", "xz", "zstandard"] } - criterion = "0.5" - lazy_static = "1" - paste = "1" - pretty_assertions = "1" - serde-tuple-vec-map = "1" - serde_bytes = "0.11" - -[[bench]] - harness = false - name = "single" - -[[bench]] - harness = false - name = "object_container_file_encoding" +[workspace] + members = [ + "serde_avro_fast", + ] + resolver = "2" diff --git a/serde_avro_fast/Cargo.toml b/serde_avro_fast/Cargo.toml new file mode 100644 index 0000000..77b8cc4 --- /dev/null +++ b/serde_avro_fast/Cargo.toml @@ -0,0 +1,51 @@ +[package] + authors = ["Thomas BESSOU "] + description = "An idiomatic implementation of serde/avro (de)serialization" + edition = "2021" + license = "LGPL-3.0-only" + name = "serde_avro_fast" + repository = "https://github.com/Ten0/serde_avro_fast" + version = "1.0.0-rc.4" + +[features] + default = ["deflate"] + deflate = ["flate2"] + snappy = ["snap", "crc32fast"] + xz = ["xz2"] + zstandard = ["zstd"] + +[dependencies] + bzip2 = { version = "0.4", optional = true } + crc32fast = { version = "1", optional = true } + flate2 = { version = "1", optional = true } + integer-encoding = { default-features = false, version = "4" } + num-traits = "0.2" + rand = "0.8" + rust_decimal = { version = "1", default-features = false, features = ["std", "serde-with-str"] } + serde = "1" + serde-transcode = "1" + serde_derive = "1" + serde_json = "1" + serde_serializer_quick_unsupported = "0.1" + snap = { version = "1", optional = true } + thiserror = "1" + xz2 = { version = "0.1", optional = true } + zstd = { version = "0.13", optional = true } + +[dev-dependencies] + anyhow = "1" + apache-avro = { version = "0.14", features = ["bzip", "snappy", "xz", "zstandard"] } + criterion = "0.5" + lazy_static = "1" + paste = "1" + pretty_assertions = "1" + serde-tuple-vec-map = "1" + serde_bytes = "0.11" + +[[bench]] + harness = false + name = "single" + +[[bench]] + harness = false + name = "object_container_file_encoding" diff --git a/benches/object_container_file_encoding.rs b/serde_avro_fast/benches/object_container_file_encoding.rs similarity index 100% rename from benches/object_container_file_encoding.rs rename to serde_avro_fast/benches/object_container_file_encoding.rs diff --git a/benches/single.rs b/serde_avro_fast/benches/single.rs similarity index 100% rename from benches/single.rs rename to serde_avro_fast/benches/single.rs diff --git a/src/de/deserializer/allowed_depth.rs b/serde_avro_fast/src/de/deserializer/allowed_depth.rs similarity index 100% rename from src/de/deserializer/allowed_depth.rs rename to serde_avro_fast/src/de/deserializer/allowed_depth.rs diff --git a/src/de/deserializer/mod.rs b/serde_avro_fast/src/de/deserializer/mod.rs similarity index 100% rename from src/de/deserializer/mod.rs rename to serde_avro_fast/src/de/deserializer/mod.rs diff --git a/src/de/deserializer/types/blocks.rs b/serde_avro_fast/src/de/deserializer/types/blocks.rs similarity index 100% rename from src/de/deserializer/types/blocks.rs rename to serde_avro_fast/src/de/deserializer/types/blocks.rs diff --git a/src/de/deserializer/types/boolean.rs b/serde_avro_fast/src/de/deserializer/types/boolean.rs similarity index 100% rename from src/de/deserializer/types/boolean.rs rename to serde_avro_fast/src/de/deserializer/types/boolean.rs diff --git a/src/de/deserializer/types/decimal.rs b/serde_avro_fast/src/de/deserializer/types/decimal.rs similarity index 100% rename from src/de/deserializer/types/decimal.rs rename to serde_avro_fast/src/de/deserializer/types/decimal.rs diff --git a/src/de/deserializer/types/discriminant.rs b/serde_avro_fast/src/de/deserializer/types/discriminant.rs similarity index 100% rename from src/de/deserializer/types/discriminant.rs rename to serde_avro_fast/src/de/deserializer/types/discriminant.rs diff --git a/src/de/deserializer/types/duration.rs b/serde_avro_fast/src/de/deserializer/types/duration.rs similarity index 100% rename from src/de/deserializer/types/duration.rs rename to serde_avro_fast/src/de/deserializer/types/duration.rs diff --git a/src/de/deserializer/types/enums.rs b/serde_avro_fast/src/de/deserializer/types/enums.rs similarity index 100% rename from src/de/deserializer/types/enums.rs rename to serde_avro_fast/src/de/deserializer/types/enums.rs diff --git a/src/de/deserializer/types/length_delimited.rs b/serde_avro_fast/src/de/deserializer/types/length_delimited.rs similarity index 100% rename from src/de/deserializer/types/length_delimited.rs rename to serde_avro_fast/src/de/deserializer/types/length_delimited.rs diff --git a/src/de/deserializer/types/mod.rs b/serde_avro_fast/src/de/deserializer/types/mod.rs similarity index 100% rename from src/de/deserializer/types/mod.rs rename to serde_avro_fast/src/de/deserializer/types/mod.rs diff --git a/src/de/deserializer/types/record.rs b/serde_avro_fast/src/de/deserializer/types/record.rs similarity index 100% rename from src/de/deserializer/types/record.rs rename to serde_avro_fast/src/de/deserializer/types/record.rs diff --git a/src/de/deserializer/types/union.rs b/serde_avro_fast/src/de/deserializer/types/union.rs similarity index 100% rename from src/de/deserializer/types/union.rs rename to serde_avro_fast/src/de/deserializer/types/union.rs diff --git a/src/de/deserializer/unit_variant_enum_access.rs b/serde_avro_fast/src/de/deserializer/unit_variant_enum_access.rs similarity index 100% rename from src/de/deserializer/unit_variant_enum_access.rs rename to serde_avro_fast/src/de/deserializer/unit_variant_enum_access.rs diff --git a/src/de/error.rs b/serde_avro_fast/src/de/error.rs similarity index 100% rename from src/de/error.rs rename to serde_avro_fast/src/de/error.rs diff --git a/src/de/mod.rs b/serde_avro_fast/src/de/mod.rs similarity index 100% rename from src/de/mod.rs rename to serde_avro_fast/src/de/mod.rs diff --git a/src/de/read/mod.rs b/serde_avro_fast/src/de/read/mod.rs similarity index 100% rename from src/de/read/mod.rs rename to serde_avro_fast/src/de/read/mod.rs diff --git a/src/de/read/take.rs b/serde_avro_fast/src/de/read/take.rs similarity index 100% rename from src/de/read/take.rs rename to serde_avro_fast/src/de/read/take.rs diff --git a/src/lib.rs b/serde_avro_fast/src/lib.rs similarity index 100% rename from src/lib.rs rename to serde_avro_fast/src/lib.rs diff --git a/src/object_container_file_encoding/mod.rs b/serde_avro_fast/src/object_container_file_encoding/mod.rs similarity index 100% rename from src/object_container_file_encoding/mod.rs rename to serde_avro_fast/src/object_container_file_encoding/mod.rs diff --git a/src/object_container_file_encoding/reader/decompression.rs b/serde_avro_fast/src/object_container_file_encoding/reader/decompression.rs similarity index 100% rename from src/object_container_file_encoding/reader/decompression.rs rename to serde_avro_fast/src/object_container_file_encoding/reader/decompression.rs diff --git a/src/object_container_file_encoding/reader/mod.rs b/serde_avro_fast/src/object_container_file_encoding/reader/mod.rs similarity index 100% rename from src/object_container_file_encoding/reader/mod.rs rename to serde_avro_fast/src/object_container_file_encoding/reader/mod.rs diff --git a/src/object_container_file_encoding/writer/compression.rs b/serde_avro_fast/src/object_container_file_encoding/writer/compression.rs similarity index 100% rename from src/object_container_file_encoding/writer/compression.rs rename to serde_avro_fast/src/object_container_file_encoding/writer/compression.rs diff --git a/src/object_container_file_encoding/writer/mod.rs b/serde_avro_fast/src/object_container_file_encoding/writer/mod.rs similarity index 100% rename from src/object_container_file_encoding/writer/mod.rs rename to serde_avro_fast/src/object_container_file_encoding/writer/mod.rs diff --git a/src/object_container_file_encoding/writer/vectored_write_polyfill.rs b/serde_avro_fast/src/object_container_file_encoding/writer/vectored_write_polyfill.rs similarity index 100% rename from src/object_container_file_encoding/writer/vectored_write_polyfill.rs rename to serde_avro_fast/src/object_container_file_encoding/writer/vectored_write_polyfill.rs diff --git a/src/schema/error.rs b/serde_avro_fast/src/schema/error.rs similarity index 100% rename from src/schema/error.rs rename to serde_avro_fast/src/schema/error.rs diff --git a/src/schema/mod.rs b/serde_avro_fast/src/schema/mod.rs similarity index 100% rename from src/schema/mod.rs rename to serde_avro_fast/src/schema/mod.rs diff --git a/src/schema/safe/canonical_form.rs b/serde_avro_fast/src/schema/safe/canonical_form.rs similarity index 100% rename from src/schema/safe/canonical_form.rs rename to serde_avro_fast/src/schema/safe/canonical_form.rs diff --git a/src/schema/safe/check_for_cycles.rs b/serde_avro_fast/src/schema/safe/check_for_cycles.rs similarity index 100% rename from src/schema/safe/check_for_cycles.rs rename to serde_avro_fast/src/schema/safe/check_for_cycles.rs diff --git a/src/schema/safe/mod.rs b/serde_avro_fast/src/schema/safe/mod.rs similarity index 100% rename from src/schema/safe/mod.rs rename to serde_avro_fast/src/schema/safe/mod.rs diff --git a/src/schema/safe/parsing/mod.rs b/serde_avro_fast/src/schema/safe/parsing/mod.rs similarity index 100% rename from src/schema/safe/parsing/mod.rs rename to serde_avro_fast/src/schema/safe/parsing/mod.rs diff --git a/src/schema/safe/parsing/raw.rs b/serde_avro_fast/src/schema/safe/parsing/raw.rs similarity index 100% rename from src/schema/safe/parsing/raw.rs rename to serde_avro_fast/src/schema/safe/parsing/raw.rs diff --git a/src/schema/safe/rabin.rs b/serde_avro_fast/src/schema/safe/rabin.rs similarity index 100% rename from src/schema/safe/rabin.rs rename to serde_avro_fast/src/schema/safe/rabin.rs diff --git a/src/schema/safe/serialize.rs b/serde_avro_fast/src/schema/safe/serialize.rs similarity index 100% rename from src/schema/safe/serialize.rs rename to serde_avro_fast/src/schema/safe/serialize.rs diff --git a/src/schema/self_referential.rs b/serde_avro_fast/src/schema/self_referential.rs similarity index 100% rename from src/schema/self_referential.rs rename to serde_avro_fast/src/schema/self_referential.rs diff --git a/src/schema/union_variants_per_type_lookup.rs b/serde_avro_fast/src/schema/union_variants_per_type_lookup.rs similarity index 100% rename from src/schema/union_variants_per_type_lookup.rs rename to serde_avro_fast/src/schema/union_variants_per_type_lookup.rs diff --git a/src/ser/error.rs b/serde_avro_fast/src/ser/error.rs similarity index 100% rename from src/ser/error.rs rename to serde_avro_fast/src/ser/error.rs diff --git a/src/ser/mod.rs b/serde_avro_fast/src/ser/mod.rs similarity index 100% rename from src/ser/mod.rs rename to serde_avro_fast/src/ser/mod.rs diff --git a/src/ser/serializer/blocks.rs b/serde_avro_fast/src/ser/serializer/blocks.rs similarity index 100% rename from src/ser/serializer/blocks.rs rename to serde_avro_fast/src/ser/serializer/blocks.rs diff --git a/src/ser/serializer/decimal.rs b/serde_avro_fast/src/ser/serializer/decimal.rs similarity index 100% rename from src/ser/serializer/decimal.rs rename to serde_avro_fast/src/ser/serializer/decimal.rs diff --git a/src/ser/serializer/extract_for_duration.rs b/serde_avro_fast/src/ser/serializer/extract_for_duration.rs similarity index 100% rename from src/ser/serializer/extract_for_duration.rs rename to serde_avro_fast/src/ser/serializer/extract_for_duration.rs diff --git a/src/ser/serializer/mod.rs b/serde_avro_fast/src/ser/serializer/mod.rs similarity index 100% rename from src/ser/serializer/mod.rs rename to serde_avro_fast/src/ser/serializer/mod.rs diff --git a/src/ser/serializer/seq_or_tuple.rs b/serde_avro_fast/src/ser/serializer/seq_or_tuple.rs similarity index 100% rename from src/ser/serializer/seq_or_tuple.rs rename to serde_avro_fast/src/ser/serializer/seq_or_tuple.rs diff --git a/src/ser/serializer/struct_or_map.rs b/serde_avro_fast/src/ser/serializer/struct_or_map.rs similarity index 100% rename from src/ser/serializer/struct_or_map.rs rename to serde_avro_fast/src/ser/serializer/struct_or_map.rs diff --git a/src/single_object_encoding.rs b/serde_avro_fast/src/single_object_encoding.rs similarity index 100% rename from src/single_object_encoding.rs rename to serde_avro_fast/src/single_object_encoding.rs diff --git a/tests/duration.rs b/serde_avro_fast/tests/duration.rs similarity index 100% rename from tests/duration.rs rename to serde_avro_fast/tests/duration.rs diff --git a/tests/from_benches.rs b/serde_avro_fast/tests/from_benches.rs similarity index 100% rename from tests/from_benches.rs rename to serde_avro_fast/tests/from_benches.rs diff --git a/tests/max_depth_prevents_stack_overflow.rs b/serde_avro_fast/tests/max_depth_prevents_stack_overflow.rs similarity index 100% rename from tests/max_depth_prevents_stack_overflow.rs rename to serde_avro_fast/tests/max_depth_prevents_stack_overflow.rs diff --git a/tests/no_cyclic_debug_on_schema.rs b/serde_avro_fast/tests/no_cyclic_debug_on_schema.rs similarity index 100% rename from tests/no_cyclic_debug_on_schema.rs rename to serde_avro_fast/tests/no_cyclic_debug_on_schema.rs diff --git a/tests/object_container_file_encoding.rs b/serde_avro_fast/tests/object_container_file_encoding.rs similarity index 100% rename from tests/object_container_file_encoding.rs rename to serde_avro_fast/tests/object_container_file_encoding.rs diff --git a/tests/round_trips.rs b/serde_avro_fast/tests/round_trips.rs similarity index 100% rename from tests/round_trips.rs rename to serde_avro_fast/tests/round_trips.rs diff --git a/tests/schema.rs b/serde_avro_fast/tests/schema.rs similarity index 100% rename from tests/schema.rs rename to serde_avro_fast/tests/schema.rs diff --git a/tests/schema_construction.rs b/serde_avro_fast/tests/schema_construction.rs similarity index 100% rename from tests/schema_construction.rs rename to serde_avro_fast/tests/schema_construction.rs diff --git a/tests/single_object_encoding.rs b/serde_avro_fast/tests/single_object_encoding.rs similarity index 100% rename from tests/single_object_encoding.rs rename to serde_avro_fast/tests/single_object_encoding.rs diff --git a/tests/unions.rs b/serde_avro_fast/tests/unions.rs similarity index 100% rename from tests/unions.rs rename to serde_avro_fast/tests/unions.rs From a2cae4864f5b163bb6eafa0a2ad66c5804acea43 Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sat, 9 Mar 2024 17:56:42 +0100 Subject: [PATCH 02/15] Link workspace --- serde_avro_fast/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/serde_avro_fast/Cargo.toml b/serde_avro_fast/Cargo.toml index 77b8cc4..a555dc7 100644 --- a/serde_avro_fast/Cargo.toml +++ b/serde_avro_fast/Cargo.toml @@ -6,6 +6,7 @@ name = "serde_avro_fast" repository = "https://github.com/Ten0/serde_avro_fast" version = "1.0.0-rc.4" + workspace = ".." [features] default = ["deflate"] From 66a9234681e278c97085f8beda2b5406f8898de7 Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sat, 9 Mar 2024 20:08:11 +0100 Subject: [PATCH 03/15] implement the derive macro --- Cargo.toml | 1 + serde_avro_derive/Cargo.toml | 22 ++++ serde_avro_derive/src/lib.rs | 14 +++ serde_avro_derive/src/schema.rs | 83 +++++++++++++++ serde_avro_derive/tests/basic.rs | 97 +++++++++++++++++ serde_avro_fast/src/schema/builder.rs | 144 ++++++++++++++++++++++++++ serde_avro_fast/src/schema/mod.rs | 6 +- 7 files changed, 366 insertions(+), 1 deletion(-) create mode 100644 serde_avro_derive/Cargo.toml create mode 100644 serde_avro_derive/src/lib.rs create mode 100644 serde_avro_derive/src/schema.rs create mode 100644 serde_avro_derive/tests/basic.rs create mode 100644 serde_avro_fast/src/schema/builder.rs diff --git a/Cargo.toml b/Cargo.toml index b55d20e..5d97f36 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,6 @@ [workspace] members = [ + "serde_avro_derive", "serde_avro_fast", ] resolver = "2" diff --git a/serde_avro_derive/Cargo.toml b/serde_avro_derive/Cargo.toml new file mode 100644 index 0000000..5f58d2d --- /dev/null +++ b/serde_avro_derive/Cargo.toml @@ -0,0 +1,22 @@ +[package] + authors = ["Thomas BESSOU "] + description = "Derive avro schema for Rust structs for serde_avro_fast" + edition = "2021" + license = "LGPL-3.0-only" + name = "serde_avro_derive" + repository = "https://github.com/Ten0/serde_avro_fast" + version = "0.1.0" + workspace = ".." + +[lib] + proc-macro = true + +[dependencies] + darling = "0.20" + proc-macro2 = "1" + quote = "1" + syn = { version = "2", features = ["visit-mut"] } + +[dev-dependencies] + serde_avro_fast = { path = "../serde_avro_fast" } + serde_json = "1" diff --git a/serde_avro_derive/src/lib.rs b/serde_avro_derive/src/lib.rs new file mode 100644 index 0000000..123f907 --- /dev/null +++ b/serde_avro_derive/src/lib.rs @@ -0,0 +1,14 @@ +mod schema; + +use darling::FromDeriveInput; + +#[proc_macro_derive(Schema, attributes(avro_schema))] +pub fn schema_derive(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let derive_input = syn::parse_macro_input!(input as syn::DeriveInput); + + match FromDeriveInput::from_derive_input(&derive_input).map(schema::schema_impl) { + Err(e) => e.write_errors().into(), + Ok(Ok(tokens)) => tokens.into(), + Ok(Err(e)) => e.into_compile_error().into(), + } +} diff --git a/serde_avro_derive/src/schema.rs b/serde_avro_derive/src/schema.rs new file mode 100644 index 0000000..649c265 --- /dev/null +++ b/serde_avro_derive/src/schema.rs @@ -0,0 +1,83 @@ +use { + proc_macro2::{Span, TokenStream}, + quote::{format_ident, quote}, + syn::{ + visit_mut::{self, VisitMut}, + Error, + }, +}; + +#[derive(darling::FromDeriveInput)] +#[darling(attributes(avro_schema), supports(struct_named))] +pub(crate) struct SchemaDeriveInput { + pub(super) ident: proc_macro2::Ident, + pub(super) data: darling::ast::Data<(), SchemaDeriveField>, + pub(super) generics: syn::Generics, +} + +#[derive(darling::FromField)] +#[darling(attributes(avro_schema))] +pub(crate) struct SchemaDeriveField { + pub(super) ident: Option, + pub(super) ty: syn::Type, +} + +pub(crate) fn schema_impl(input: SchemaDeriveInput) -> Result { + let fields = input + .data + .take_struct() + .expect("Supports directive should prevent enums"); + + let ident = &input.ident; + let struct_name = ident.to_string(); + let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl(); + + let field_names = fields + .iter() + .map(|f| f.ident.as_ref().map(|i| i.to_string())) + .collect::>>() + .ok_or_else(|| Error::new(Span::call_site(), "Unnamed fields are not supported"))?; + let field_types = fields.iter().map(|f| &f.ty); + + let mut generics_staticified = input.generics.clone(); + TurnLifetimesToStatic.visit_generics_mut(&mut generics_staticified); + let (_, ty_generics_staticified, _) = generics_staticified.split_for_impl(); + + Ok(quote! { + const _: () = { + use serde_avro_fast::schema::{self, builder}; + + impl #impl_generics builder::BuildSchemaInner for #ident #ty_generics #where_clause { + fn build(builder: &mut builder::SchemaBuilder) -> schema::SchemaKey { + let reserved_schema_key = builder.reserve(); + let mut struct_name = module_path!().replace("::", "."); + struct_name.push('.'); + struct_name.push_str(#struct_name); + let new_node = schema::SchemaNode::RegularType(schema::RegularType::Record( + schema::Record::new( + schema::Name::from_fully_qualified_name(struct_name), + vec![#( + schema::RecordField::new( + #field_names, + builder::node_idx::<#field_types>(builder), + ), + )*], + ), + )); + builder.nodes[reserved_schema_key.idx()] = new_node; + reserved_schema_key + } + + type TypeLookup = #ident #ty_generics_staticified; + } + }; + }) +} + +struct TurnLifetimesToStatic; +impl VisitMut for TurnLifetimesToStatic { + fn visit_lifetime_mut(&mut self, i: &mut syn::Lifetime) { + i.ident = format_ident!("static"); + visit_mut::visit_lifetime_mut(self, i) + } +} diff --git a/serde_avro_derive/tests/basic.rs b/serde_avro_derive/tests/basic.rs new file mode 100644 index 0000000..5719b66 --- /dev/null +++ b/serde_avro_derive/tests/basic.rs @@ -0,0 +1,97 @@ +use serde_avro_fast::schema::BuildSchema; + +#[derive(serde_avro_derive::Schema)] +#[allow(unused)] +struct Primitives { + a: i32, + b: String, +} + +#[derive(serde_avro_derive::Schema)] +struct SubStruct { + #[allow(unused)] + primitives: Primitives, +} + +#[derive(serde_avro_derive::Schema)] +#[allow(unused)] +struct TopStruct { + s1: SubStruct, + s2: SubStruct, + vec: Vec, +} + +fn test(expected: &str) { + let schema = serde_json::to_string_pretty(&T::schema_mut()).unwrap(); + println!("{schema}"); + assert_eq!(schema, expected); +} + +#[test] +fn primitives() { + test::( + r#"{ + "type": "record", + "name": "basic.Primitives", + "fields": [ + { + "name": "a", + "type": "int" + }, + { + "name": "b", + "type": "string" + } + ] +}"#, + ); +} + +#[test] +fn substruct_and_vec() { + test::( + r#"{ + "type": "record", + "name": "basic.TopStruct", + "fields": [ + { + "name": "s1", + "type": { + "type": "record", + "name": "SubStruct", + "fields": [ + { + "name": "primitives", + "type": { + "type": "record", + "name": "Primitives", + "fields": [ + { + "name": "a", + "type": "int" + }, + { + "name": "b", + "type": "string" + } + ] + } + } + ] + } + }, + { + "name": "s2", + "type": "SubStruct" + }, + { + "name": "vec", + "type": { + "type": "array", + "items": "string" + } + } + ] +}"#, + ); +} diff --git a/serde_avro_fast/src/schema/builder.rs b/serde_avro_fast/src/schema/builder.rs new file mode 100644 index 0000000..71c7532 --- /dev/null +++ b/serde_avro_fast/src/schema/builder.rs @@ -0,0 +1,144 @@ +use std::{any::TypeId, collections::HashMap}; + +use crate::schema::*; + +pub trait BuildSchema { + fn schema() -> Schema { + Self::schema_mut() + .try_into() + .expect("Schema derive generated invalid schema") + } + fn schema_mut() -> SchemaMut; +} +impl BuildSchema for T { + fn schema_mut() -> SchemaMut { + let mut builder = SchemaBuilder::default(); + assert_eq!(T::build(&mut builder).idx(), 0); + SchemaMut::from_nodes(builder.nodes) + } +} + +#[derive(Default)] +pub struct SchemaBuilder { + pub nodes: Vec, + pub already_built: HashMap, +} + +impl SchemaBuilder { + pub fn reserve(&mut self) -> SchemaKey { + let idx = self.nodes.len(); + self.nodes.push(SchemaNode::RegularType(RegularType::Null)); + SchemaKey::from_idx(idx) + } +} + +pub trait BuildSchemaInner { + fn build(builder: &mut SchemaBuilder) -> SchemaKey; + type TypeLookup: std::any::Any; +} + +pub fn node_idx(builder: &mut SchemaBuilder) -> SchemaKey { + match builder.already_built.entry(TypeId::of::()) { + std::collections::hash_map::Entry::Occupied(entry) => *entry.get(), + std::collections::hash_map::Entry::Vacant(entry) => { + let expected_idx = SchemaKey::from_idx(builder.nodes.len()); + entry.insert(expected_idx); + let idx = T::build(builder); + assert_eq!(idx, expected_idx); + idx + } + } +} + +macro_rules! impl_primitive { + ($ty:ty, $variant:ident; $($rest:tt)*) => { + impl_primitive!($ty, Self, $variant; $($rest)*); + }; + ($ty:ty, $type_id_of: ty, $variant:ident; $($rest:tt)*) => { + impl BuildSchemaInner for $ty { + fn build(builder: &mut SchemaBuilder) -> SchemaKey { + let schema_key = SchemaKey::from_idx(builder.nodes.len()); + builder.nodes.push(SchemaNode::RegularType(RegularType::$variant)); + schema_key + } + type TypeLookup = $type_id_of; + } + impl_primitive!($($rest)*); + }; + () => {}; +} +impl_primitive!( + (), Null; + bool, Boolean; + i32, Int; + i64, Long; + f32, Float; + f64, Double; + String, String; + &'_ str, String, String; + Vec, Bytes; + &'_ [u8], Vec, Bytes; +); + +impl BuildSchemaInner for Vec { + fn build(builder: &mut SchemaBuilder) -> SchemaKey { + let reserved_schema_key = builder.reserve(); + let new_node = + SchemaNode::RegularType(RegularType::Array(Array::new(node_idx::(builder)))); + builder.nodes[reserved_schema_key.idx()] = new_node; + reserved_schema_key + } + + type TypeLookup = Vec; +} + +impl BuildSchemaInner for &'_ [T] { + fn build(builder: &mut SchemaBuilder) -> SchemaKey { + as BuildSchemaInner>::build(builder) + } + type TypeLookup = as BuildSchemaInner>::TypeLookup; +} + +impl BuildSchemaInner for Option { + fn build(builder: &mut SchemaBuilder) -> SchemaKey { + let reserved_schema_key = builder.reserve(); + let new_node = SchemaNode::RegularType(RegularType::Union(Union::new(vec![ + node_idx::<()>(builder), + node_idx::(builder), + ]))); + builder.nodes[reserved_schema_key.idx()] = new_node; + reserved_schema_key + } + + type TypeLookup = Option; +} + +impl BuildSchemaInner for [u8; N] { + fn build(builder: &mut SchemaBuilder) -> SchemaKey { + let schema_key = SchemaKey::from_idx(builder.nodes.len()); + builder + .nodes + .push(SchemaNode::RegularType(RegularType::Fixed(Fixed::new( + Name::from_fully_qualified_name(format!("u8_array_{}", N)), + N, + )))); + schema_key + } + type TypeLookup = Self; +} + +impl BuildSchemaInner for HashMap { + fn build(builder: &mut SchemaBuilder) -> SchemaKey { + let reserved_schema_key = builder.reserve(); + let new_node = SchemaNode::RegularType(RegularType::Map(Map::new(node_idx::(builder)))); + builder.nodes[reserved_schema_key.idx()] = new_node; + reserved_schema_key + } + type TypeLookup = HashMap; +} +impl BuildSchemaInner for std::collections::BTreeMap { + fn build(builder: &mut SchemaBuilder) -> SchemaKey { + as BuildSchemaInner>::build(builder) + } + type TypeLookup = as BuildSchemaInner>::TypeLookup; +} diff --git a/serde_avro_fast/src/schema/mod.rs b/serde_avro_fast/src/schema/mod.rs index 6f5fe87..5d31ddc 100644 --- a/serde_avro_fast/src/schema/mod.rs +++ b/serde_avro_fast/src/schema/mod.rs @@ -1,11 +1,15 @@ //! Navigate, modify and initialize the [`Schema`] +// For now I'll consider this not stable so not public API +#[doc(hidden)] +pub mod builder; + mod error; mod safe; pub(crate) mod self_referential; mod union_variants_per_type_lookup; -pub use {error::SchemaError, safe::*, self_referential::Schema}; +pub use {builder::BuildSchema, error::SchemaError, safe::*, self_referential::Schema}; pub(crate) use union_variants_per_type_lookup::UnionVariantLookupKey; From d67c8f1986e04b50d0754e736c9326842f562e90 Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sat, 9 Mar 2024 21:22:30 +0100 Subject: [PATCH 04/15] Add documentation and do some cleaning --- serde_avro_derive/Cargo.toml | 1 + serde_avro_derive/src/lib.rs | 50 ++++++++++++++++++++ serde_avro_derive/tests/basic.rs | 67 ++++++++++++++++++++------- serde_avro_fast/src/schema/builder.rs | 46 ++++++++++++------ 4 files changed, 131 insertions(+), 33 deletions(-) diff --git a/serde_avro_derive/Cargo.toml b/serde_avro_derive/Cargo.toml index 5f58d2d..467afd2 100644 --- a/serde_avro_derive/Cargo.toml +++ b/serde_avro_derive/Cargo.toml @@ -18,5 +18,6 @@ syn = { version = "2", features = ["visit-mut"] } [dev-dependencies] + pretty_assertions = "1" serde_avro_fast = { path = "../serde_avro_fast" } serde_json = "1" diff --git a/serde_avro_derive/src/lib.rs b/serde_avro_derive/src/lib.rs index 123f907..8a9407b 100644 --- a/serde_avro_derive/src/lib.rs +++ b/serde_avro_derive/src/lib.rs @@ -1,8 +1,58 @@ +//! Derive the ability to build an Avro schema for a type +//! +//! See [the macro's documentation](schema_derive). + mod schema; use darling::FromDeriveInput; #[proc_macro_derive(Schema, attributes(avro_schema))] +/// Derive the ability to build an Avro schema for a type +/// +/// # Example +/// ``` +/// #[derive(serde_avro_derive::Schema)] +/// struct Foo { +/// primitives: Bar, +/// } +/// +/// #[derive(serde_avro_derive::Schema)] +/// #[allow(unused)] +/// struct Bar { +/// a: i32, +/// b: String, +/// } +/// +/// // The [`serde_avro_fast::schema::BuildSchema`] implementation will +/// // generate the following schema: +/// +/// let schema = r#"{ +/// "type": "record", +/// "name": "rust_out.Foo", +/// "fields": [ +/// { +/// "name": "primitives", +/// "type": { +/// "type": "record", +/// "name": "Bar", +/// "fields": [ +/// { +/// "name": "a", +/// "type": "int" +/// }, +/// { +/// "name": "b", +/// "type": "string" +/// } +/// ] +/// } +/// } +/// ] +/// }"#; +/// # use serde_avro_fast::schema::BuildSchema; +/// # let actual_schema = serde_json::to_string_pretty(&Foo::schema_mut()).unwrap(); +/// # assert_eq!(actual_schema, schema); +/// ``` pub fn schema_derive(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let derive_input = syn::parse_macro_input!(input as syn::DeriveInput); diff --git a/serde_avro_derive/tests/basic.rs b/serde_avro_derive/tests/basic.rs index 5719b66..4005eb7 100644 --- a/serde_avro_derive/tests/basic.rs +++ b/serde_avro_derive/tests/basic.rs @@ -1,23 +1,25 @@ use serde_avro_fast::schema::BuildSchema; +use pretty_assertions::assert_eq; + #[derive(serde_avro_derive::Schema)] -#[allow(unused)] -struct Primitives { - a: i32, - b: String, +struct Foo { + #[allow(unused)] + primitives: Bar, } #[derive(serde_avro_derive::Schema)] -struct SubStruct { - #[allow(unused)] - primitives: Primitives, +#[allow(unused)] +struct Bar { + a: i32, + b: String, } #[derive(serde_avro_derive::Schema)] #[allow(unused)] -struct TopStruct { - s1: SubStruct, - s2: SubStruct, +struct Complex { + s1: Foo, + s2: Foo, vec: Vec, } @@ -29,10 +31,10 @@ fn test(expected: &str) { #[test] fn primitives() { - test::( + test::( r#"{ "type": "record", - "name": "basic.Primitives", + "name": "basic.Bar", "fields": [ { "name": "a", @@ -48,23 +50,52 @@ fn primitives() { } #[test] -fn substruct_and_vec() { - test::( +fn substruct() { + test::( + r#"{ + "type": "record", + "name": "basic.Foo", + "fields": [ + { + "name": "primitives", + "type": { + "type": "record", + "name": "Bar", + "fields": [ + { + "name": "a", + "type": "int" + }, + { + "name": "b", + "type": "string" + } + ] + } + } + ] +}"#, + ); +} + +#[test] +fn complex() { + test::( r#"{ "type": "record", - "name": "basic.TopStruct", + "name": "basic.Complex", "fields": [ { "name": "s1", "type": { "type": "record", - "name": "SubStruct", + "name": "Foo", "fields": [ { "name": "primitives", "type": { "type": "record", - "name": "Primitives", + "name": "Bar", "fields": [ { "name": "a", @@ -82,7 +113,7 @@ fn substruct_and_vec() { }, { "name": "s2", - "type": "SubStruct" + "type": "Foo" }, { "name": "vec", diff --git a/serde_avro_fast/src/schema/builder.rs b/serde_avro_fast/src/schema/builder.rs index 71c7532..962fef9 100644 --- a/serde_avro_fast/src/schema/builder.rs +++ b/serde_avro_fast/src/schema/builder.rs @@ -2,6 +2,10 @@ use std::{any::TypeId, collections::HashMap}; use crate::schema::*; +/// We can automatically build a schema for this type (can be `derive`d) +/// +/// This trait can be derived using `#[derive(Schema)]` from the +/// [`serde_avro_derive`](https://docs.rs/serde_avro_derive/) crate pub trait BuildSchema { fn schema() -> Schema { Self::schema_mut() @@ -51,21 +55,18 @@ pub fn node_idx(builder: &mut SchemaBuilder) -> SchemaKey { } macro_rules! impl_primitive { - ($ty:ty, $variant:ident; $($rest:tt)*) => { - impl_primitive!($ty, Self, $variant; $($rest)*); - }; - ($ty:ty, $type_id_of: ty, $variant:ident; $($rest:tt)*) => { - impl BuildSchemaInner for $ty { - fn build(builder: &mut SchemaBuilder) -> SchemaKey { - let schema_key = SchemaKey::from_idx(builder.nodes.len()); - builder.nodes.push(SchemaNode::RegularType(RegularType::$variant)); - schema_key + ($($ty:ty, $variant:ident;)+) => { + $( + impl BuildSchemaInner for $ty { + fn build(builder: &mut SchemaBuilder) -> SchemaKey { + let schema_key = SchemaKey::from_idx(builder.nodes.len()); + builder.nodes.push(SchemaNode::RegularType(RegularType::$variant)); + schema_key + } + type TypeLookup = Self; } - type TypeLookup = $type_id_of; - } - impl_primitive!($($rest)*); + )* }; - () => {}; } impl_primitive!( (), Null; @@ -75,11 +76,26 @@ impl_primitive!( f32, Float; f64, Double; String, String; - &'_ str, String, String; Vec, Bytes; - &'_ [u8], Vec, Bytes; ); +macro_rules! delegate_impl { + ($($ty:ty, $to:ty;)+) => { + $( + impl BuildSchemaInner for $ty { + fn build(builder: &mut SchemaBuilder) -> SchemaKey { + <$to as BuildSchemaInner>::build(builder) + } + type TypeLookup = <$to as BuildSchemaInner>::TypeLookup; + } + )* + }; +} +delegate_impl! { + &'_ str, String; + &'_ [u8], Vec; +} + impl BuildSchemaInner for Vec { fn build(builder: &mut SchemaBuilder) -> SchemaKey { let reserved_schema_key = builder.reserve(); From caf09d051e7b497a07dad47f89018f2959af4cbc Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sat, 9 Mar 2024 23:09:10 +0100 Subject: [PATCH 05/15] put the schema builder traits & utilities in a separate crate --- Cargo.toml | 1 + serde_avro_derive/Cargo.toml | 11 +- serde_avro_derive/src/lib.rs | 224 +++++++++++++----- serde_avro_derive/tests/basic.rs | 2 +- serde_avro_derive_macros/Cargo.toml | 22 ++ serde_avro_derive_macros/src/lib.rs | 64 +++++ .../src/schema.rs | 8 +- serde_avro_fast/src/schema/builder.rs | 160 ------------- serde_avro_fast/src/schema/mod.rs | 6 +- 9 files changed, 262 insertions(+), 236 deletions(-) create mode 100644 serde_avro_derive_macros/Cargo.toml create mode 100644 serde_avro_derive_macros/src/lib.rs rename {serde_avro_derive => serde_avro_derive_macros}/src/schema.rs (88%) delete mode 100644 serde_avro_fast/src/schema/builder.rs diff --git a/Cargo.toml b/Cargo.toml index 5d97f36..d2ce5e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,7 @@ [workspace] members = [ "serde_avro_derive", + "serde_avro_derive_macros", "serde_avro_fast", ] resolver = "2" diff --git a/serde_avro_derive/Cargo.toml b/serde_avro_derive/Cargo.toml index 467afd2..1049015 100644 --- a/serde_avro_derive/Cargo.toml +++ b/serde_avro_derive/Cargo.toml @@ -8,16 +8,11 @@ version = "0.1.0" workspace = ".." -[lib] - proc-macro = true - [dependencies] - darling = "0.20" - proc-macro2 = "1" - quote = "1" - syn = { version = "2", features = ["visit-mut"] } + serde_avro_derive_macros = { path = "../serde_avro_derive_macros" } + serde_avro_fast = { path = "../serde_avro_fast" } [dev-dependencies] pretty_assertions = "1" - serde_avro_fast = { path = "../serde_avro_fast" } + serde_avro_derive = { path = "../serde_avro_derive" } serde_json = "1" diff --git a/serde_avro_derive/src/lib.rs b/serde_avro_derive/src/lib.rs index 8a9407b..df50f88 100644 --- a/serde_avro_derive/src/lib.rs +++ b/serde_avro_derive/src/lib.rs @@ -1,64 +1,172 @@ -//! Derive the ability to build an Avro schema for a type -//! -//! See [the macro's documentation](schema_derive). +pub use serde_avro_fast; -mod schema; +pub use serde_avro_derive_macros::*; -use darling::FromDeriveInput; +use std::{any::TypeId, collections::HashMap}; -#[proc_macro_derive(Schema, attributes(avro_schema))] -/// Derive the ability to build an Avro schema for a type -/// -/// # Example -/// ``` -/// #[derive(serde_avro_derive::Schema)] -/// struct Foo { -/// primitives: Bar, -/// } -/// -/// #[derive(serde_avro_derive::Schema)] -/// #[allow(unused)] -/// struct Bar { -/// a: i32, -/// b: String, -/// } -/// -/// // The [`serde_avro_fast::schema::BuildSchema`] implementation will -/// // generate the following schema: +use serde_avro_fast::schema::*; + +/// We can automatically build a schema for this type (can be `derive`d) /// -/// let schema = r#"{ -/// "type": "record", -/// "name": "rust_out.Foo", -/// "fields": [ -/// { -/// "name": "primitives", -/// "type": { -/// "type": "record", -/// "name": "Bar", -/// "fields": [ -/// { -/// "name": "a", -/// "type": "int" -/// }, -/// { -/// "name": "b", -/// "type": "string" -/// } -/// ] -/// } -/// } -/// ] -/// }"#; -/// # use serde_avro_fast::schema::BuildSchema; -/// # let actual_schema = serde_json::to_string_pretty(&Foo::schema_mut()).unwrap(); -/// # assert_eq!(actual_schema, schema); -/// ``` -pub fn schema_derive(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - let derive_input = syn::parse_macro_input!(input as syn::DeriveInput); - - match FromDeriveInput::from_derive_input(&derive_input).map(schema::schema_impl) { - Err(e) => e.write_errors().into(), - Ok(Ok(tokens)) => tokens.into(), - Ok(Err(e)) => e.into_compile_error().into(), +/// This trait can be derived using `#[derive(Schema)]` from the +/// [`serde_avro_derive`](https://docs.rs/serde_avro_derive/) crate +pub trait BuildSchema { + /// Obtain the [`struct@Schema`] for this type + fn schema() -> Schema { + Self::schema_mut() + .try_into() + .expect("Schema derive generated invalid schema") + } + /// Obtain the [`SchemaMut`] for this type + fn schema_mut() -> SchemaMut { + let mut builder = SchemaBuilder::default(); + assert_eq!(Self::build_schema(&mut builder).idx(), 0); + SchemaMut::from_nodes(builder.nodes) + } + + /// Largely internal method to build the schema. Registers the schema with + /// the builder. + fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey; + type TypeLookup: std::any::Any; +} + +#[derive(Default)] +pub struct SchemaBuilder { + pub nodes: Vec, + pub already_built: HashMap, + _private: (), +} + +impl SchemaBuilder { + pub fn reserve(&mut self) -> SchemaKey { + let idx = self.nodes.len(); + self.nodes.push(SchemaNode::RegularType(RegularType::Null)); + SchemaKey::from_idx(idx) + } + + pub fn find_or_build(&mut self) -> SchemaKey { + match self.already_built.entry(TypeId::of::()) { + std::collections::hash_map::Entry::Occupied(entry) => *entry.get(), + std::collections::hash_map::Entry::Vacant(entry) => { + let expected_idx = SchemaKey::from_idx(self.nodes.len()); + entry.insert(expected_idx); + let idx = T::build_schema(self); + assert_eq!(idx, expected_idx); + idx + } + } + } +} + +macro_rules! impl_primitive { + ($($ty:ty, $variant:ident;)+) => { + $( + impl BuildSchema for $ty { + fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey { + let schema_key = SchemaKey::from_idx(builder.nodes.len()); + builder.nodes.push(SchemaNode::RegularType(RegularType::$variant)); + schema_key + } + type TypeLookup = Self; + } + )* + }; +} +impl_primitive!( + (), Null; + bool, Boolean; + i32, Int; + i64, Long; + f32, Float; + f64, Double; + String, String; + Vec, Bytes; +); + +macro_rules! delegate_impl { + ($($ty:ty, $to:ty;)+) => { + $( + impl BuildSchema for $ty { + fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey { + <$to as BuildSchema>::build_schema(builder) + } + type TypeLookup = <$to as BuildSchema>::TypeLookup; + } + )* + }; +} +delegate_impl! { + &'_ str, String; + &'_ [u8], Vec; + u16, i32; + u32, i64; + u64, i64; + i8, i32; + i16, i32; +} + +impl BuildSchema for Vec { + fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey { + let reserved_schema_key = builder.reserve(); + let new_node = + SchemaNode::RegularType(RegularType::Array(Array::new(builder.find_or_build::()))); + builder.nodes[reserved_schema_key.idx()] = new_node; + reserved_schema_key + } + + type TypeLookup = Vec; +} + +impl BuildSchema for &'_ [T] { + fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey { + as BuildSchema>::build_schema(builder) + } + type TypeLookup = as BuildSchema>::TypeLookup; +} + +impl BuildSchema for Option { + fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey { + let reserved_schema_key = builder.reserve(); + let new_node = SchemaNode::RegularType(RegularType::Union(Union::new(vec![ + builder.find_or_build::<()>(), + builder.find_or_build::(), + ]))); + builder.nodes[reserved_schema_key.idx()] = new_node; + reserved_schema_key + } + + type TypeLookup = Option; +} + +impl BuildSchema for [u8; N] { + fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey { + let schema_key = SchemaKey::from_idx(builder.nodes.len()); + builder + .nodes + .push(SchemaNode::RegularType(RegularType::Fixed(Fixed::new( + Name::from_fully_qualified_name(format!("u8_array_{}", N)), + N, + )))); + schema_key + } + type TypeLookup = Self; +} + +impl, V: BuildSchema> BuildSchema for HashMap { + fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey { + let reserved_schema_key = builder.reserve(); + let new_node = + SchemaNode::RegularType(RegularType::Map(Map::new(builder.find_or_build::()))); + builder.nodes[reserved_schema_key.idx()] = new_node; + reserved_schema_key + } + type TypeLookup = HashMap; +} +impl, V: BuildSchema> BuildSchema + for std::collections::BTreeMap +{ + fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey { + as BuildSchema>::build_schema(builder) } + type TypeLookup = as BuildSchema>::TypeLookup; } diff --git a/serde_avro_derive/tests/basic.rs b/serde_avro_derive/tests/basic.rs index 4005eb7..953f313 100644 --- a/serde_avro_derive/tests/basic.rs +++ b/serde_avro_derive/tests/basic.rs @@ -1,4 +1,4 @@ -use serde_avro_fast::schema::BuildSchema; +use serde_avro_derive::BuildSchema; use pretty_assertions::assert_eq; diff --git a/serde_avro_derive_macros/Cargo.toml b/serde_avro_derive_macros/Cargo.toml new file mode 100644 index 0000000..712eed6 --- /dev/null +++ b/serde_avro_derive_macros/Cargo.toml @@ -0,0 +1,22 @@ +[package] + authors = ["Thomas BESSOU "] + description = "Derive avro schema for Rust structs for serde_avro_fast" + edition = "2021" + license = "LGPL-3.0-only" + name = "serde_avro_derive_macros" + repository = "https://github.com/Ten0/serde_avro_fast" + version = "0.1.0" + workspace = ".." + +[lib] + proc-macro = true + +[dependencies] + darling = "0.20" + proc-macro2 = "1" + quote = "1" + syn = { version = "2", features = ["visit-mut"] } + +[dev-dependencies] + serde_avro_derive = { path = "../serde_avro_derive" } + serde_json = "1" diff --git a/serde_avro_derive_macros/src/lib.rs b/serde_avro_derive_macros/src/lib.rs new file mode 100644 index 0000000..4b25df8 --- /dev/null +++ b/serde_avro_derive_macros/src/lib.rs @@ -0,0 +1,64 @@ +//! Internal macros crate for the `serde_avro_derive` crate +//! +//! Use [`serde_avro_derive`](https://docs.rs/serde_avro_derive/) instead of using this crate directly + +mod schema; + +use darling::FromDeriveInput; + +#[proc_macro_derive(Schema, attributes(avro_schema))] +/// Derive the ability to build an Avro schema for a type +/// +/// # Example +/// ``` +/// #[derive(serde_avro_derive::Schema)] +/// struct Foo { +/// primitives: Bar, +/// } +/// +/// #[derive(serde_avro_derive::Schema)] +/// #[allow(unused)] +/// struct Bar { +/// a: i32, +/// b: String, +/// } +/// +/// // The [`serde_avro_fast::schema::BuildSchema`] implementation will +/// // generate the following schema: +/// +/// let schema = r#"{ +/// "type": "record", +/// "name": "rust_out.Foo", +/// "fields": [ +/// { +/// "name": "primitives", +/// "type": { +/// "type": "record", +/// "name": "Bar", +/// "fields": [ +/// { +/// "name": "a", +/// "type": "int" +/// }, +/// { +/// "name": "b", +/// "type": "string" +/// } +/// ] +/// } +/// } +/// ] +/// }"#; +/// # use serde_avro_derive::BuildSchema; +/// # let actual_schema = serde_json::to_string_pretty(&Foo::schema_mut()).unwrap(); +/// # assert_eq!(actual_schema, schema); +/// ``` +pub fn schema_derive(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let derive_input = syn::parse_macro_input!(input as syn::DeriveInput); + + match FromDeriveInput::from_derive_input(&derive_input).map(schema::schema_impl) { + Err(e) => e.write_errors().into(), + Ok(Ok(tokens)) => tokens.into(), + Ok(Err(e)) => e.into_compile_error().into(), + } +} diff --git a/serde_avro_derive/src/schema.rs b/serde_avro_derive_macros/src/schema.rs similarity index 88% rename from serde_avro_derive/src/schema.rs rename to serde_avro_derive_macros/src/schema.rs index 649c265..38a746e 100644 --- a/serde_avro_derive/src/schema.rs +++ b/serde_avro_derive_macros/src/schema.rs @@ -45,10 +45,10 @@ pub(crate) fn schema_impl(input: SchemaDeriveInput) -> Result schema::SchemaKey { + impl #impl_generics serde_avro_derive::BuildSchema for #ident #ty_generics #where_clause { + fn build_schema(builder: &mut serde_avro_derive::SchemaBuilder) -> schema::SchemaKey { let reserved_schema_key = builder.reserve(); let mut struct_name = module_path!().replace("::", "."); struct_name.push('.'); @@ -59,7 +59,7 @@ pub(crate) fn schema_impl(input: SchemaDeriveInput) -> Result(builder), + builder.find_or_build::<#field_types>(), ), )*], ), diff --git a/serde_avro_fast/src/schema/builder.rs b/serde_avro_fast/src/schema/builder.rs deleted file mode 100644 index 962fef9..0000000 --- a/serde_avro_fast/src/schema/builder.rs +++ /dev/null @@ -1,160 +0,0 @@ -use std::{any::TypeId, collections::HashMap}; - -use crate::schema::*; - -/// We can automatically build a schema for this type (can be `derive`d) -/// -/// This trait can be derived using `#[derive(Schema)]` from the -/// [`serde_avro_derive`](https://docs.rs/serde_avro_derive/) crate -pub trait BuildSchema { - fn schema() -> Schema { - Self::schema_mut() - .try_into() - .expect("Schema derive generated invalid schema") - } - fn schema_mut() -> SchemaMut; -} -impl BuildSchema for T { - fn schema_mut() -> SchemaMut { - let mut builder = SchemaBuilder::default(); - assert_eq!(T::build(&mut builder).idx(), 0); - SchemaMut::from_nodes(builder.nodes) - } -} - -#[derive(Default)] -pub struct SchemaBuilder { - pub nodes: Vec, - pub already_built: HashMap, -} - -impl SchemaBuilder { - pub fn reserve(&mut self) -> SchemaKey { - let idx = self.nodes.len(); - self.nodes.push(SchemaNode::RegularType(RegularType::Null)); - SchemaKey::from_idx(idx) - } -} - -pub trait BuildSchemaInner { - fn build(builder: &mut SchemaBuilder) -> SchemaKey; - type TypeLookup: std::any::Any; -} - -pub fn node_idx(builder: &mut SchemaBuilder) -> SchemaKey { - match builder.already_built.entry(TypeId::of::()) { - std::collections::hash_map::Entry::Occupied(entry) => *entry.get(), - std::collections::hash_map::Entry::Vacant(entry) => { - let expected_idx = SchemaKey::from_idx(builder.nodes.len()); - entry.insert(expected_idx); - let idx = T::build(builder); - assert_eq!(idx, expected_idx); - idx - } - } -} - -macro_rules! impl_primitive { - ($($ty:ty, $variant:ident;)+) => { - $( - impl BuildSchemaInner for $ty { - fn build(builder: &mut SchemaBuilder) -> SchemaKey { - let schema_key = SchemaKey::from_idx(builder.nodes.len()); - builder.nodes.push(SchemaNode::RegularType(RegularType::$variant)); - schema_key - } - type TypeLookup = Self; - } - )* - }; -} -impl_primitive!( - (), Null; - bool, Boolean; - i32, Int; - i64, Long; - f32, Float; - f64, Double; - String, String; - Vec, Bytes; -); - -macro_rules! delegate_impl { - ($($ty:ty, $to:ty;)+) => { - $( - impl BuildSchemaInner for $ty { - fn build(builder: &mut SchemaBuilder) -> SchemaKey { - <$to as BuildSchemaInner>::build(builder) - } - type TypeLookup = <$to as BuildSchemaInner>::TypeLookup; - } - )* - }; -} -delegate_impl! { - &'_ str, String; - &'_ [u8], Vec; -} - -impl BuildSchemaInner for Vec { - fn build(builder: &mut SchemaBuilder) -> SchemaKey { - let reserved_schema_key = builder.reserve(); - let new_node = - SchemaNode::RegularType(RegularType::Array(Array::new(node_idx::(builder)))); - builder.nodes[reserved_schema_key.idx()] = new_node; - reserved_schema_key - } - - type TypeLookup = Vec; -} - -impl BuildSchemaInner for &'_ [T] { - fn build(builder: &mut SchemaBuilder) -> SchemaKey { - as BuildSchemaInner>::build(builder) - } - type TypeLookup = as BuildSchemaInner>::TypeLookup; -} - -impl BuildSchemaInner for Option { - fn build(builder: &mut SchemaBuilder) -> SchemaKey { - let reserved_schema_key = builder.reserve(); - let new_node = SchemaNode::RegularType(RegularType::Union(Union::new(vec![ - node_idx::<()>(builder), - node_idx::(builder), - ]))); - builder.nodes[reserved_schema_key.idx()] = new_node; - reserved_schema_key - } - - type TypeLookup = Option; -} - -impl BuildSchemaInner for [u8; N] { - fn build(builder: &mut SchemaBuilder) -> SchemaKey { - let schema_key = SchemaKey::from_idx(builder.nodes.len()); - builder - .nodes - .push(SchemaNode::RegularType(RegularType::Fixed(Fixed::new( - Name::from_fully_qualified_name(format!("u8_array_{}", N)), - N, - )))); - schema_key - } - type TypeLookup = Self; -} - -impl BuildSchemaInner for HashMap { - fn build(builder: &mut SchemaBuilder) -> SchemaKey { - let reserved_schema_key = builder.reserve(); - let new_node = SchemaNode::RegularType(RegularType::Map(Map::new(node_idx::(builder)))); - builder.nodes[reserved_schema_key.idx()] = new_node; - reserved_schema_key - } - type TypeLookup = HashMap; -} -impl BuildSchemaInner for std::collections::BTreeMap { - fn build(builder: &mut SchemaBuilder) -> SchemaKey { - as BuildSchemaInner>::build(builder) - } - type TypeLookup = as BuildSchemaInner>::TypeLookup; -} diff --git a/serde_avro_fast/src/schema/mod.rs b/serde_avro_fast/src/schema/mod.rs index 5d31ddc..6f5fe87 100644 --- a/serde_avro_fast/src/schema/mod.rs +++ b/serde_avro_fast/src/schema/mod.rs @@ -1,15 +1,11 @@ //! Navigate, modify and initialize the [`Schema`] -// For now I'll consider this not stable so not public API -#[doc(hidden)] -pub mod builder; - mod error; mod safe; pub(crate) mod self_referential; mod union_variants_per_type_lookup; -pub use {builder::BuildSchema, error::SchemaError, safe::*, self_referential::Schema}; +pub use {error::SchemaError, safe::*, self_referential::Schema}; pub(crate) use union_variants_per_type_lookup::UnionVariantLookupKey; From bb2f933e5ee0e11d7ead4e6b47f4165967f8acd0 Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sat, 9 Mar 2024 23:49:03 +0100 Subject: [PATCH 06/15] naming and documentation improvements --- serde_avro_derive/src/lib.rs | 138 +++++++++++++++++-------- serde_avro_derive_macros/src/lib.rs | 11 +- serde_avro_derive_macros/src/schema.rs | 5 +- 3 files changed, 107 insertions(+), 47 deletions(-) diff --git a/serde_avro_derive/src/lib.rs b/serde_avro_derive/src/lib.rs index df50f88..20688dd 100644 --- a/serde_avro_derive/src/lib.rs +++ b/serde_avro_derive/src/lib.rs @@ -1,3 +1,8 @@ +//! Bring automatic Avro Schema generation to [`serde_avro_fast`] +//! +//! See the [`#[derive(Schema)]`](derive@Schema) documentation for more +//! information + pub use serde_avro_fast; pub use serde_avro_derive_macros::*; @@ -8,50 +13,81 @@ use serde_avro_fast::schema::*; /// We can automatically build a schema for this type (can be `derive`d) /// -/// This trait can be derived using `#[derive(Schema)]` from the -/// [`serde_avro_derive`](https://docs.rs/serde_avro_derive/) crate +/// This trait can be derived using [`#[derive(Schema)]`](derive@Schema) pub trait BuildSchema { - /// Obtain the [`struct@Schema`] for this type + /// Build a [`struct@Schema`] for this type fn schema() -> Schema { Self::schema_mut() .try_into() .expect("Schema derive generated invalid schema") } - /// Obtain the [`SchemaMut`] for this type + /// Build a [`SchemaMut`] for this type fn schema_mut() -> SchemaMut { let mut builder = SchemaBuilder::default(); - assert_eq!(Self::build_schema(&mut builder).idx(), 0); + Self::append_schema(&mut builder); SchemaMut::from_nodes(builder.nodes) } - /// Largely internal method to build the schema. Registers the schema with + /// Largely internal method to build the schema. Registers the schema within /// the builder. - fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey; + /// + /// This does not check if this type already exists in the builder, so it + /// should never be called directly (instead, use + /// [`SchemaBuilder::find_or_build`]) + /// + /// The [`SchemaNode`] for this type should be put at the current end of the + /// `nodes` array, and its non-already-built dependencies should be put + /// after in the array. + fn append_schema(builder: &mut SchemaBuilder); + + /// Largely internal type used by [`#[derive(Schema)]`](derive@Schema) + /// + /// The TypeId of this type will be used to lookup whether the + /// [`SchemaNode`] for this type has already been built in the + /// [`SchemaBuilder`]. + /// + /// This indirection is required to allow non-static types to implement + /// [`BuildSchema`], and also enables using the same node for types that we + /// know map to the same schema. type TypeLookup: std::any::Any; } +/// Largely internal type used by [`#[derive(Schema)]`](derive@Schema) +/// +/// You should typically not use this directly #[derive(Default)] pub struct SchemaBuilder { pub nodes: Vec, - pub already_built: HashMap, + pub already_built_types: HashMap, _private: (), } impl SchemaBuilder { - pub fn reserve(&mut self) -> SchemaKey { + /// Reserve a slot in the `nodes` array + /// + /// After building the `SchemaNode`, it should be put at the corresponding + /// position in `nodes`. + pub fn reserve(&mut self) -> usize { let idx = self.nodes.len(); self.nodes.push(SchemaNode::RegularType(RegularType::Null)); - SchemaKey::from_idx(idx) + idx } pub fn find_or_build(&mut self) -> SchemaKey { - match self.already_built.entry(TypeId::of::()) { + match self + .already_built_types + .entry(TypeId::of::()) + { std::collections::hash_map::Entry::Occupied(entry) => *entry.get(), std::collections::hash_map::Entry::Vacant(entry) => { - let expected_idx = SchemaKey::from_idx(self.nodes.len()); - entry.insert(expected_idx); - let idx = T::build_schema(self); - assert_eq!(idx, expected_idx); + let idx = SchemaKey::from_idx(self.nodes.len()); + entry.insert(idx); + T::append_schema(self); + assert!( + self.nodes.len() > idx.idx(), + "append_schema should always insert at least a node \ + (and its dependencies below itself)" + ); idx } } @@ -62,10 +98,8 @@ macro_rules! impl_primitive { ($($ty:ty, $variant:ident;)+) => { $( impl BuildSchema for $ty { - fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey { - let schema_key = SchemaKey::from_idx(builder.nodes.len()); + fn append_schema(builder: &mut SchemaBuilder) { builder.nodes.push(SchemaNode::RegularType(RegularType::$variant)); - schema_key } type TypeLookup = Self; } @@ -83,90 +117,112 @@ impl_primitive!( Vec, Bytes; ); -macro_rules! delegate_impl { +macro_rules! impl_forward { ($($ty:ty, $to:ty;)+) => { $( impl BuildSchema for $ty { - fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey { - <$to as BuildSchema>::build_schema(builder) + fn append_schema(builder: &mut SchemaBuilder) { + <$to as BuildSchema>::append_schema(builder) } type TypeLookup = <$to as BuildSchema>::TypeLookup; } )* }; } -delegate_impl! { - &'_ str, String; - &'_ [u8], Vec; +impl_forward! { + str, String; + [u8], Vec; u16, i32; u32, i64; u64, i64; i8, i32; i16, i32; + usize, i64; +} + +macro_rules! impl_ptr { + ($($($ty_path:ident)::+,)+) => { + $( + impl BuildSchema for $($ty_path)::+ { + fn append_schema(builder: &mut SchemaBuilder) { + ::append_schema(builder) + } + type TypeLookup = T::TypeLookup; + } + )* + }; +} +impl_ptr! { + Box, + std::sync::Arc, + std::rc::Rc, + std::cell::RefCell, + std::cell::Cell, +} +impl BuildSchema for &'_ T { + fn append_schema(builder: &mut SchemaBuilder) { + ::append_schema(builder) + } + type TypeLookup = T::TypeLookup; } impl BuildSchema for Vec { - fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey { + fn append_schema(builder: &mut SchemaBuilder) { let reserved_schema_key = builder.reserve(); let new_node = SchemaNode::RegularType(RegularType::Array(Array::new(builder.find_or_build::()))); - builder.nodes[reserved_schema_key.idx()] = new_node; - reserved_schema_key + builder.nodes[reserved_schema_key] = new_node; } type TypeLookup = Vec; } -impl BuildSchema for &'_ [T] { - fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey { - as BuildSchema>::build_schema(builder) +impl BuildSchema for [T] { + fn append_schema(builder: &mut SchemaBuilder) { + as BuildSchema>::append_schema(builder) } type TypeLookup = as BuildSchema>::TypeLookup; } impl BuildSchema for Option { - fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey { + fn append_schema(builder: &mut SchemaBuilder) { let reserved_schema_key = builder.reserve(); let new_node = SchemaNode::RegularType(RegularType::Union(Union::new(vec![ builder.find_or_build::<()>(), builder.find_or_build::(), ]))); - builder.nodes[reserved_schema_key.idx()] = new_node; - reserved_schema_key + builder.nodes[reserved_schema_key] = new_node; } type TypeLookup = Option; } impl BuildSchema for [u8; N] { - fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey { - let schema_key = SchemaKey::from_idx(builder.nodes.len()); + fn append_schema(builder: &mut SchemaBuilder) { builder .nodes .push(SchemaNode::RegularType(RegularType::Fixed(Fixed::new( Name::from_fully_qualified_name(format!("u8_array_{}", N)), N, )))); - schema_key } type TypeLookup = Self; } impl, V: BuildSchema> BuildSchema for HashMap { - fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey { + fn append_schema(builder: &mut SchemaBuilder) { let reserved_schema_key = builder.reserve(); let new_node = SchemaNode::RegularType(RegularType::Map(Map::new(builder.find_or_build::()))); - builder.nodes[reserved_schema_key.idx()] = new_node; - reserved_schema_key + builder.nodes[reserved_schema_key] = new_node; } type TypeLookup = HashMap; } impl, V: BuildSchema> BuildSchema for std::collections::BTreeMap { - fn build_schema(builder: &mut SchemaBuilder) -> SchemaKey { - as BuildSchema>::build_schema(builder) + fn append_schema(builder: &mut SchemaBuilder) { + as BuildSchema>::append_schema(builder) } type TypeLookup = as BuildSchema>::TypeLookup; } diff --git a/serde_avro_derive_macros/src/lib.rs b/serde_avro_derive_macros/src/lib.rs index 4b25df8..668a4b7 100644 --- a/serde_avro_derive_macros/src/lib.rs +++ b/serde_avro_derive_macros/src/lib.rs @@ -8,6 +8,7 @@ use darling::FromDeriveInput; #[proc_macro_derive(Schema, attributes(avro_schema))] /// Derive the ability to build an Avro schema for a type +/// (implements `BuildSchema`) /// /// # Example /// ``` @@ -23,10 +24,11 @@ use darling::FromDeriveInput; /// b: String, /// } /// +/// let schema = Foo::schema(); +/// /// // The [`serde_avro_fast::schema::BuildSchema`] implementation will /// // generate the following schema: -/// -/// let schema = r#"{ +/// let schema_str = r#"{ /// "type": "record", /// "name": "rust_out.Foo", /// "fields": [ @@ -49,9 +51,12 @@ use darling::FromDeriveInput; /// } /// ] /// }"#; +/// // Note that the `rust_out` namespace here is only due to the fact this is a doctest: +/// // the name will always be crate_name.path.to.module.Foo +/// // (but for doctests the crate is called rust_out and the struct is at top level) /// # use serde_avro_derive::BuildSchema; /// # let actual_schema = serde_json::to_string_pretty(&Foo::schema_mut()).unwrap(); -/// # assert_eq!(actual_schema, schema); +/// # assert_eq!(actual_schema, schema_str); /// ``` pub fn schema_derive(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let derive_input = syn::parse_macro_input!(input as syn::DeriveInput); diff --git a/serde_avro_derive_macros/src/schema.rs b/serde_avro_derive_macros/src/schema.rs index 38a746e..fa10494 100644 --- a/serde_avro_derive_macros/src/schema.rs +++ b/serde_avro_derive_macros/src/schema.rs @@ -48,7 +48,7 @@ pub(crate) fn schema_impl(input: SchemaDeriveInput) -> Result schema::SchemaKey { + fn append_schema(builder: &mut serde_avro_derive::SchemaBuilder) { let reserved_schema_key = builder.reserve(); let mut struct_name = module_path!().replace("::", "."); struct_name.push('.'); @@ -64,8 +64,7 @@ pub(crate) fn schema_impl(input: SchemaDeriveInput) -> Result Date: Sun, 10 Mar 2024 02:14:16 +0100 Subject: [PATCH 07/15] Generics support --- serde_avro_derive/src/lib.rs | 18 +++ serde_avro_derive/tests/basic.rs | 76 +++++++++--- serde_avro_derive_macros/Cargo.toml | 2 +- serde_avro_derive_macros/src/schema.rs | 161 +++++++++++++++++++++++-- 4 files changed, 226 insertions(+), 31 deletions(-) diff --git a/serde_avro_derive/src/lib.rs b/serde_avro_derive/src/lib.rs index 20688dd..64818f0 100644 --- a/serde_avro_derive/src/lib.rs +++ b/serde_avro_derive/src/lib.rs @@ -165,6 +165,12 @@ impl BuildSchema for &'_ T { } type TypeLookup = T::TypeLookup; } +impl BuildSchema for &'_ mut T { + fn append_schema(builder: &mut SchemaBuilder) { + ::append_schema(builder) + } + type TypeLookup = T::TypeLookup; +} impl BuildSchema for Vec { fn append_schema(builder: &mut SchemaBuilder) { @@ -226,3 +232,15 @@ impl, V: BuildSchema> BuildSchema } type TypeLookup = as BuildSchema>::TypeLookup; } + +#[doc(hidden)] +pub fn hash_type_id(struct_name: &mut String, type_id: TypeId) { + use std::{ + fmt::Write, + hash::{Hash as _, Hasher as _}, + }; + #[allow(deprecated)] // I actually want to not change hasher + let mut hasher = std::hash::SipHasher::new(); + type_id.hash(&mut hasher); + write!(struct_name, "_{:016x?}", hasher.finish()).unwrap(); +} diff --git a/serde_avro_derive/tests/basic.rs b/serde_avro_derive/tests/basic.rs index 953f313..ec784ba 100644 --- a/serde_avro_derive/tests/basic.rs +++ b/serde_avro_derive/tests/basic.rs @@ -2,10 +2,10 @@ use serde_avro_derive::BuildSchema; use pretty_assertions::assert_eq; -#[derive(serde_avro_derive::Schema)] -struct Foo { - #[allow(unused)] - primitives: Bar, +fn test(expected: &str) { + let schema = serde_json::to_string_pretty(&T::schema_mut()).unwrap(); + println!("{schema}"); + assert_eq!(schema, expected); } #[derive(serde_avro_derive::Schema)] @@ -15,20 +15,6 @@ struct Bar { b: String, } -#[derive(serde_avro_derive::Schema)] -#[allow(unused)] -struct Complex { - s1: Foo, - s2: Foo, - vec: Vec, -} - -fn test(expected: &str) { - let schema = serde_json::to_string_pretty(&T::schema_mut()).unwrap(); - println!("{schema}"); - assert_eq!(schema, expected); -} - #[test] fn primitives() { test::( @@ -49,6 +35,12 @@ fn primitives() { ); } +#[derive(serde_avro_derive::Schema)] +struct Foo { + #[allow(unused)] + primitives: Bar, +} + #[test] fn substruct() { test::( @@ -78,6 +70,14 @@ fn substruct() { ); } +#[derive(serde_avro_derive::Schema)] +#[allow(unused)] +struct Complex { + s1: Foo, + s2: Foo, + vec: Vec, +} + #[test] fn complex() { test::( @@ -126,3 +126,43 @@ fn complex() { }"#, ); } + +#[derive(serde_avro_derive::Schema)] +#[allow(unused)] +struct Generics<'a, F> { + s1: F, + s2: &'a F, +} + +#[test] +fn generics() { + test::>( + r#"{ + "type": "record", + "name": "basic.Generics_be632cb05c10a877", + "fields": [ + { + "name": "s1", + "type": { + "type": "record", + "name": "Bar", + "fields": [ + { + "name": "a", + "type": "int" + }, + { + "name": "b", + "type": "string" + } + ] + } + }, + { + "name": "s2", + "type": "Bar" + } + ] +}"#, + ); +} diff --git a/serde_avro_derive_macros/Cargo.toml b/serde_avro_derive_macros/Cargo.toml index 712eed6..d21f426 100644 --- a/serde_avro_derive_macros/Cargo.toml +++ b/serde_avro_derive_macros/Cargo.toml @@ -15,7 +15,7 @@ darling = "0.20" proc-macro2 = "1" quote = "1" - syn = { version = "2", features = ["visit-mut"] } + syn = { version = "2", features = ["visit", "extra-traits"] } [dev-dependencies] serde_avro_derive = { path = "../serde_avro_derive" } diff --git a/serde_avro_derive_macros/src/schema.rs b/serde_avro_derive_macros/src/schema.rs index fa10494..3916b57 100644 --- a/serde_avro_derive_macros/src/schema.rs +++ b/serde_avro_derive_macros/src/schema.rs @@ -2,7 +2,8 @@ use { proc_macro2::{Span, TokenStream}, quote::{format_ident, quote}, syn::{ - visit_mut::{self, VisitMut}, + parse_quote, + visit::{self, Visit}, Error, }, }; @@ -30,18 +31,118 @@ pub(crate) fn schema_impl(input: SchemaDeriveInput) -> Result = + Default::default(); + let field_types = fields + .iter() + .map(|f| { + let mut ty = &f.ty; + while let syn::Type::Reference(r) = ty { + // This allows not requiring the user to specify that T: 'a + // as an explicit where predicate, and simplifies the calls + ty = &r.elem; + } + if !generics.params.is_empty() { + let mut is_relevant_generic = IsRelevantGeneric { + generics: &generics, + result: false, + }; + is_relevant_generic.visit_type(ty); + if is_relevant_generic.result { + if added_where_clause_predicate_for_types.insert(ty) { + generics + .make_where_clause() + .predicates + .push(parse_quote!(#ty: serde_avro_derive::BuildSchema)); + } + } + } + ty + }) + .collect::>(); + + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); let field_names = fields .iter() .map(|f| f.ident.as_ref().map(|i| i.to_string())) .collect::>>() .ok_or_else(|| Error::new(Span::call_site(), "Unnamed fields are not supported"))?; - let field_types = fields.iter().map(|f| &f.ty); - let mut generics_staticified = input.generics.clone(); - TurnLifetimesToStatic.visit_generics_mut(&mut generics_staticified); - let (_, ty_generics_staticified, _) = generics_staticified.split_for_impl(); + let has_generics = !generics.params.is_empty(); + let (type_lookup, type_lookup_decl): (syn::Type, _) = match has_generics { + false => (parse_quote!(Self), None), + true => { + // The struct we are deriving on is generic, but we need the TypeLookup to be + // 'static otherwise it won't implement `Any`, so we need to generate a + // dedicated struct for it. + let type_lookup_ident = format_ident!("{ident}TypeLookup"); + let type_params: Vec = (0..generics.params.len()) + .map(|i| format_ident!("T{}", i)) + .collect(); + let struct_decl = syn::ItemStruct { + attrs: Default::default(), + vis: syn::Visibility::Inherited, + struct_token: syn::token::Struct::default(), + ident: type_lookup_ident.clone(), + generics: syn::Generics { + lt_token: Some(Default::default()), + params: type_params + .iter() + .map(|ident| -> syn::GenericParam { parse_quote!(#ident) }) + .collect(), + gt_token: Some(Default::default()), + where_clause: None, + }, + fields: syn::Fields::Named(syn::FieldsNamed { + named: fields + .iter() + .zip(&type_params) + .map(|(field, ident)| syn::Field { + attrs: Default::default(), + vis: syn::Visibility::Inherited, + ident: field.ident.clone(), + colon_token: Some(Default::default()), + ty: { parse_quote!(#ident) }, + mutability: syn::FieldMutability::None, + }) + .collect(), + brace_token: Default::default(), + }), + semi_token: None, + }; + let type_lookup = syn::PathSegment { + ident: type_lookup_ident, + arguments: syn::PathArguments::AngleBracketed( + syn::AngleBracketedGenericArguments { + args: field_types + .iter() + .map(|ty| -> syn::GenericArgument { + parse_quote!(<#ty as serde_avro_derive::BuildSchema>::TypeLookup) + }) + .collect(), + colon2_token: Default::default(), + lt_token: Default::default(), + gt_token: Default::default(), + }, + ), + }; + (parse_quote!(#type_lookup), Some(struct_decl)) + } + }; + + let add_type_id_to_fqn = if has_generics { + quote! { + serde_avro_derive::hash_type_id( + &mut struct_name, + std::any::TypeId::of::<::TypeLookup>(), + ); + } + } else { + quote! {} + }; Ok(quote! { const _: () = { @@ -53,6 +154,7 @@ pub(crate) fn schema_impl(input: SchemaDeriveInput) -> Result Result { + generics: &'a syn::Generics, + result: bool, +} +impl Visit<'_> for IsRelevantGeneric<'_> { + fn visit_type(&mut self, v: &syn::Type) { + match v { + syn::Type::Path(v) => { + if let Some(v) = v.path.get_ident() { + if self.generics.params.iter().any(|p| match p { + syn::GenericParam::Type(t) => t.ident == *v, + _ => false, + }) { + self.result = true; + } + } + } + _ => {} + } + visit::visit_type(self, v); + } + fn visit_lifetime(&mut self, v: &syn::Lifetime) { + if self.generics.params.iter().any(|p| match p { + syn::GenericParam::Lifetime(l) => l.lifetime == *v, + _ => false, + }) { + self.result = true; + } + visit::visit_lifetime(self, v) + } + fn visit_const_param(&mut self, v: &syn::ConstParam) { + if self.generics.params.iter().any(|p| match p { + syn::GenericParam::Const(c) => c == v, + _ => false, + }) { + self.result = true; + } + visit::visit_const_param(self, v) } } From 0348c3694df55d52710394edfb0b0a7c1da7cc96 Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sun, 10 Mar 2024 02:23:42 +0100 Subject: [PATCH 08/15] Don't unwrap Schema -> SchemaMut conversion --- serde_avro_derive/src/lib.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/serde_avro_derive/src/lib.rs b/serde_avro_derive/src/lib.rs index 64818f0..b94aebc 100644 --- a/serde_avro_derive/src/lib.rs +++ b/serde_avro_derive/src/lib.rs @@ -16,10 +16,8 @@ use serde_avro_fast::schema::*; /// This trait can be derived using [`#[derive(Schema)]`](derive@Schema) pub trait BuildSchema { /// Build a [`struct@Schema`] for this type - fn schema() -> Schema { - Self::schema_mut() - .try_into() - .expect("Schema derive generated invalid schema") + fn schema() -> Result { + Self::schema_mut().try_into() } /// Build a [`SchemaMut`] for this type fn schema_mut() -> SchemaMut { From 886ecb2370a716220ed5661748c5d7208e1a7c29 Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sun, 10 Mar 2024 10:58:44 +0100 Subject: [PATCH 09/15] fix for &str --- serde_avro_derive/src/lib.rs | 2 +- .../tests/{basic.rs => derive_schema.rs} | 13 +++++++++---- serde_avro_derive_macros/src/schema.rs | 5 ++--- 3 files changed, 12 insertions(+), 8 deletions(-) rename serde_avro_derive/tests/{basic.rs => derive_schema.rs} (91%) diff --git a/serde_avro_derive/src/lib.rs b/serde_avro_derive/src/lib.rs index b94aebc..e12948b 100644 --- a/serde_avro_derive/src/lib.rs +++ b/serde_avro_derive/src/lib.rs @@ -71,7 +71,7 @@ impl SchemaBuilder { idx } - pub fn find_or_build(&mut self) -> SchemaKey { + pub fn find_or_build(&mut self) -> SchemaKey { match self .already_built_types .entry(TypeId::of::()) diff --git a/serde_avro_derive/tests/basic.rs b/serde_avro_derive/tests/derive_schema.rs similarity index 91% rename from serde_avro_derive/tests/basic.rs rename to serde_avro_derive/tests/derive_schema.rs index ec784ba..957dd48 100644 --- a/serde_avro_derive/tests/basic.rs +++ b/serde_avro_derive/tests/derive_schema.rs @@ -20,7 +20,7 @@ fn primitives() { test::( r#"{ "type": "record", - "name": "basic.Bar", + "name": "derive_schema.Bar", "fields": [ { "name": "a", @@ -46,7 +46,7 @@ fn substruct() { test::( r#"{ "type": "record", - "name": "basic.Foo", + "name": "derive_schema.Foo", "fields": [ { "name": "primitives", @@ -83,7 +83,7 @@ fn complex() { test::( r#"{ "type": "record", - "name": "basic.Complex", + "name": "derive_schema.Complex", "fields": [ { "name": "s1", @@ -132,6 +132,7 @@ fn complex() { struct Generics<'a, F> { s1: F, s2: &'a F, + s: &'a str, } #[test] @@ -139,7 +140,7 @@ fn generics() { test::>( r#"{ "type": "record", - "name": "basic.Generics_be632cb05c10a877", + "name": "derive_schema.Generics_62462e653c3a8376", "fields": [ { "name": "s1", @@ -161,6 +162,10 @@ fn generics() { { "name": "s2", "type": "Bar" + }, + { + "name": "s", + "type": "string" } ] }"#, diff --git a/serde_avro_derive_macros/src/schema.rs b/serde_avro_derive_macros/src/schema.rs index 3916b57..602368b 100644 --- a/serde_avro_derive_macros/src/schema.rs +++ b/serde_avro_derive_macros/src/schema.rs @@ -79,9 +79,8 @@ pub(crate) fn schema_impl(input: SchemaDeriveInput) -> Result = (0..generics.params.len()) - .map(|i| format_ident!("T{}", i)) - .collect(); + let type_params: Vec = + (0..fields.len()).map(|i| format_ident!("T{}", i)).collect(); let struct_decl = syn::ItemStruct { attrs: Default::default(), vis: syn::Visibility::Inherited, From 76989fa4ef0efb3bf538add735a7dfac10750a7b Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sun, 10 Mar 2024 11:07:46 +0100 Subject: [PATCH 10/15] add doc --- serde_avro_derive_macros/src/schema.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/serde_avro_derive_macros/src/schema.rs b/serde_avro_derive_macros/src/schema.rs index 602368b..ff5d8d3 100644 --- a/serde_avro_derive_macros/src/schema.rs +++ b/serde_avro_derive_macros/src/schema.rs @@ -78,6 +78,22 @@ pub(crate) fn schema_impl(input: SchemaDeriveInput) -> Result { + // f1: Bar, + // f2: Baz; + // } + // We'll generate + // struct FooTypeLookup { + // f1: T0, + // f1: T1, + // } + // and then use type TypeLookup = + // TypeLookup< + // ::TypeLookup, + // ::TypeLookup, + // >; let type_lookup_ident = format_ident!("{ident}TypeLookup"); let type_params: Vec = (0..fields.len()).map(|i| format_ident!("T{}", i)).collect(); From e3f23ddc579be716998bb79b484275509c1543da Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sun, 10 Mar 2024 11:24:59 +0100 Subject: [PATCH 11/15] Simplify generated struct when only generic on lifetimes + add skip attribute --- serde_avro_derive/tests/derive_schema.rs | 29 +++++++++++- serde_avro_derive_macros/Cargo.toml | 2 +- serde_avro_derive_macros/src/schema.rs | 57 +++++++++++++++++------- 3 files changed, 70 insertions(+), 18 deletions(-) diff --git a/serde_avro_derive/tests/derive_schema.rs b/serde_avro_derive/tests/derive_schema.rs index 957dd48..0f23dc5 100644 --- a/serde_avro_derive/tests/derive_schema.rs +++ b/serde_avro_derive/tests/derive_schema.rs @@ -140,7 +140,7 @@ fn generics() { test::>( r#"{ "type": "record", - "name": "derive_schema.Generics_62462e653c3a8376", + "name": "derive_schema.Generics_b8f49e32140be9d5", "fields": [ { "name": "s1", @@ -171,3 +171,30 @@ fn generics() { }"#, ); } + +#[derive(serde_avro_derive::Schema)] +#[allow(unused)] +struct Lifetimes<'a, 'b> { + s: &'a [&'b str], + #[avro_schema(skip)] + z: String, +} + +#[test] +fn lifetimes() { + test::>( + r#"{ + "type": "record", + "name": "derive_schema.Lifetimes", + "fields": [ + { + "name": "s", + "type": { + "type": "array", + "items": "string" + } + } + ] +}"#, + ); +} diff --git a/serde_avro_derive_macros/Cargo.toml b/serde_avro_derive_macros/Cargo.toml index d21f426..719e129 100644 --- a/serde_avro_derive_macros/Cargo.toml +++ b/serde_avro_derive_macros/Cargo.toml @@ -15,7 +15,7 @@ darling = "0.20" proc-macro2 = "1" quote = "1" - syn = { version = "2", features = ["visit", "extra-traits"] } + syn = { version = "2", features = ["visit", "visit-mut", "extra-traits"] } [dev-dependencies] serde_avro_derive = { path = "../serde_avro_derive" } diff --git a/serde_avro_derive_macros/src/schema.rs b/serde_avro_derive_macros/src/schema.rs index ff5d8d3..214cc43 100644 --- a/serde_avro_derive_macros/src/schema.rs +++ b/serde_avro_derive_macros/src/schema.rs @@ -4,6 +4,7 @@ use { syn::{ parse_quote, visit::{self, Visit}, + visit_mut::{self, VisitMut}, Error, }, }; @@ -11,26 +12,29 @@ use { #[derive(darling::FromDeriveInput)] #[darling(attributes(avro_schema), supports(struct_named))] pub(crate) struct SchemaDeriveInput { - pub(super) ident: proc_macro2::Ident, - pub(super) data: darling::ast::Data<(), SchemaDeriveField>, - pub(super) generics: syn::Generics, + ident: proc_macro2::Ident, + data: darling::ast::Data<(), SchemaDeriveField>, + generics: syn::Generics, } #[derive(darling::FromField)] #[darling(attributes(avro_schema))] pub(crate) struct SchemaDeriveField { - pub(super) ident: Option, - pub(super) ty: syn::Type, + ident: Option, + ty: syn::Type, + + skip: darling::util::Flag, } pub(crate) fn schema_impl(input: SchemaDeriveInput) -> Result { - let fields = input + let mut fields = input .data .take_struct() .expect("Supports directive should prevent enums"); + fields.fields.retain(|f| !f.skip.is_present()); - let ident = &input.ident; - let struct_name = ident.to_string(); + let struct_ident = &input.ident; + let struct_name = struct_ident.to_string(); let mut generics = input.generics; let mut added_where_clause_predicate_for_types: std::collections::HashSet<_> = @@ -63,17 +67,28 @@ pub(crate) fn schema_impl(input: SchemaDeriveInput) -> Result>(); - let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); - let field_names = fields .iter() .map(|f| f.ident.as_ref().map(|i| i.to_string())) .collect::>>() .ok_or_else(|| Error::new(Span::call_site(), "Unnamed fields are not supported"))?; - let has_generics = !generics.params.is_empty(); - let (type_lookup, type_lookup_decl): (syn::Type, _) = match has_generics { - false => (parse_quote!(Self), None), + let has_non_lifetime_generics = generics + .params + .iter() + .any(|gp| !matches!(gp, syn::GenericParam::Lifetime(_))); + let (type_lookup, type_lookup_decl): (syn::Type, _) = match has_non_lifetime_generics { + false => { + let type_lookup = if generics.params.is_empty() { + parse_quote!(Self) + } else { + let mut generics_static = generics.clone(); + TurnLifetimesToStatic.visit_generics_mut(&mut generics_static); + let (_, ty_generics, _) = generics_static.split_for_impl(); + parse_quote!(#struct_ident #ty_generics) + }; + (type_lookup, None) + } true => { // The struct we are deriving on is generic, but we need the TypeLookup to be // 'static otherwise it won't implement `Any`, so we need to generate a @@ -94,7 +109,7 @@ pub(crate) fn schema_impl(input: SchemaDeriveInput) -> Result::TypeLookup, // ::TypeLookup, // >; - let type_lookup_ident = format_ident!("{ident}TypeLookup"); + let type_lookup_ident = format_ident!("{struct_ident}TypeLookup"); let type_params: Vec = (0..fields.len()).map(|i| format_ident!("T{}", i)).collect(); let struct_decl = syn::ItemStruct { @@ -148,7 +163,7 @@ pub(crate) fn schema_impl(input: SchemaDeriveInput) -> Result Result for IsRelevantGeneric<'_> { visit::visit_const_param(self, v) } } + +struct TurnLifetimesToStatic; +impl VisitMut for TurnLifetimesToStatic { + fn visit_lifetime_mut(&mut self, i: &mut syn::Lifetime) { + i.ident = format_ident!("static"); + visit_mut::visit_lifetime_mut(self, i) + } +} From efab4b7b1c345b49a34655f86aac3ddf11a1bfc3 Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sun, 10 Mar 2024 12:42:23 +0100 Subject: [PATCH 12/15] Add logical types suppoer and make TYPEIDHASH deterministic in tests --- serde_avro_derive/Cargo.toml | 2 + serde_avro_derive/src/lib.rs | 13 +++ serde_avro_derive/tests/derive_schema.rs | 64 ++++++++++++- serde_avro_derive_macros/src/schema.rs | 112 ++++++++++++++++++++++- 4 files changed, 188 insertions(+), 3 deletions(-) diff --git a/serde_avro_derive/Cargo.toml b/serde_avro_derive/Cargo.toml index 1049015..151fe85 100644 --- a/serde_avro_derive/Cargo.toml +++ b/serde_avro_derive/Cargo.toml @@ -13,6 +13,8 @@ serde_avro_fast = { path = "../serde_avro_fast" } [dev-dependencies] + lazy_static = "1" pretty_assertions = "1" + regex = "1" serde_avro_derive = { path = "../serde_avro_derive" } serde_json = "1" diff --git a/serde_avro_derive/src/lib.rs b/serde_avro_derive/src/lib.rs index e12948b..963e7da 100644 --- a/serde_avro_derive/src/lib.rs +++ b/serde_avro_derive/src/lib.rs @@ -90,6 +90,19 @@ impl SchemaBuilder { } } } + + pub fn build_logical_type( + &mut self, + logical_type: LogicalType, + ) -> SchemaKey { + let reserved_schema_key = self.reserve(); + let new_node = SchemaNode::LogicalType { + logical_type, + inner: self.find_or_build::(), + }; + self.nodes[reserved_schema_key] = new_node; + SchemaKey::from_idx(reserved_schema_key) + } } macro_rules! impl_primitive { diff --git a/serde_avro_derive/tests/derive_schema.rs b/serde_avro_derive/tests/derive_schema.rs index 0f23dc5..a1087dd 100644 --- a/serde_avro_derive/tests/derive_schema.rs +++ b/serde_avro_derive/tests/derive_schema.rs @@ -3,9 +3,25 @@ use serde_avro_derive::BuildSchema; use pretty_assertions::assert_eq; fn test(expected: &str) { - let schema = serde_json::to_string_pretty(&T::schema_mut()).unwrap(); + let schema = clean_schema(&serde_json::to_string_pretty(&T::schema_mut()).unwrap()); println!("{schema}"); assert_eq!(schema, expected); + + // Round trip + let schema_mut: serde_avro_fast::schema::SchemaMut = schema.parse().unwrap(); + dbg!(&schema_mut); + let schema2 = clean_schema(&serde_json::to_string_pretty(&schema_mut).unwrap()); + assert_eq!(schema2, expected); + let _schema: serde_avro_fast::Schema = schema_mut.try_into().unwrap(); +} + +fn clean_schema(schema: &str) -> String { + lazy_static::lazy_static! { + static ref REGEX: regex::Regex = regex::Regex::new(r#""(derive_schema\.[^_]+_)\w{16}""#).unwrap(); + } + REGEX + .replace_all(schema, r#""${1}TYPEIDHASH""#) + .into_owned() } #[derive(serde_avro_derive::Schema)] @@ -140,7 +156,7 @@ fn generics() { test::>( r#"{ "type": "record", - "name": "derive_schema.Generics_b8f49e32140be9d5", + "name": "derive_schema.Generics_TYPEIDHASH", "fields": [ { "name": "s1", @@ -198,3 +214,47 @@ fn lifetimes() { }"#, ); } + +#[derive(serde_avro_derive::Schema)] +#[allow(unused)] +struct LogicalTypes<'a> { + #[avro_schema(logical_type = Uuid)] + uuid: &'a str, + #[avro_schema(logical_type = Decimal, scale = 1, precision = 4)] + decimal: f64, + #[avro_schema(logical_type = CustomLogicalType)] + custom: &'a str, +} + +#[test] +fn logical_types() { + test::>( + r#"{ + "type": "record", + "name": "derive_schema.LogicalTypes", + "fields": [ + { + "name": "uuid", + "type": { + "logical_type": "uuid", + "type": "string" + } + }, + { + "name": "decimal", + "type": { + "logical_type": "decimal", + "type": "double" + } + }, + { + "name": "custom", + "type": { + "logical_type": "CustomLogicalType", + "type": "string" + } + } + ] +}"#, + ); +} diff --git a/serde_avro_derive_macros/src/schema.rs b/serde_avro_derive_macros/src/schema.rs index 214cc43..f280b8c 100644 --- a/serde_avro_derive_macros/src/schema.rs +++ b/serde_avro_derive_macros/src/schema.rs @@ -24,9 +24,14 @@ pub(crate) struct SchemaDeriveField { ty: syn::Type, skip: darling::util::Flag, + logical_type: Option, + scale: Option>, + precision: Option>, } pub(crate) fn schema_impl(input: SchemaDeriveInput) -> Result { + let mut errors = TokenStream::default(); + let mut fields = input .data .take_struct() @@ -67,6 +72,96 @@ pub(crate) fn schema_impl(input: SchemaDeriveInput) -> Result>(); + let field_instantiations = fields.iter().zip(&field_types).map(|(field, ty)| { + let mut logical_type_ident = field.logical_type.as_ref(); + if logical_type_ident.is_none() { + if let syn::Type::Path(path) = &field.ty { + if let Some(last_type_ident) = path.path.segments.last().map(|s| &s.ident) { + let last_type_str = last_type_ident.to_string(); + match last_type_str.as_str() { + "Uuid" => logical_type_ident = Some(last_type_ident), + _ => {} + } + } + } + } + match logical_type_ident { + None => quote! { builder.find_or_build::<#ty>() }, + Some(logical_type_ident) => { + let logical_type_str = logical_type_ident.to_string(); + let mut logical_type = if [ + "Decimal", + "Uuid", + "Date", + "TimeMillis", + "TimeMicros", + "TimestampMillis", + "TimestampMicros", + "Duration", + ] + .contains(&logical_type_str.as_str()) + { + // This is a known logical type + quote! { schema::LogicalType::#logical_type_ident } + } else { + quote! { schema::LogicalType::Unknown( + #logical_type_str.to_owned() + ) } + }; + if logical_type_str == "Decimal" { + let zero = parse_quote!(0); + let mut error = |missing_field: &str| { + errors.extend( + Error::new_spanned( + logical_type_ident, + format_args!( + "`Decimal` logical type requires \ + `{missing_field}` attribute to be set" + ), + ) + .to_compile_error(), + ); + &zero + }; + let scale = field + .scale + .as_ref() + .map_or_else(|| error("scale"), |w| &w.value); + let precision = field + .precision + .as_ref() + .map_or_else(|| error("precision"), |w| &w.value); + logical_type.extend(quote! { + (schema::Decimal::new(#scale, #precision)) + }); + } else { + let mut error = |field_that_should_not_be_here: &WithMetaPath| { + errors.extend( + Error::new_spanned( + &field_that_should_not_be_here.path, + format_args!( + "`{}` attribute is not relevant for `{}` logical type", + darling::util::path_to_string( + &field_that_should_not_be_here.path + ), + logical_type_str + ), + ) + .to_compile_error(), + ); + }; + if let Some(f) = &field.scale { + error(&f); + } + if let Some(f) = &field.precision { + error(&f); + } + } + quote! { builder.build_logical_type::<#ty>(#logical_type) } + } + } + }); + let field_names = fields .iter() .map(|f| f.ident.as_ref().map(|i| i.to_string())) @@ -193,7 +288,7 @@ pub(crate) fn schema_impl(input: SchemaDeriveInput) -> Result(), + #field_instantiations, ), )*], ), @@ -205,6 +300,8 @@ pub(crate) fn schema_impl(input: SchemaDeriveInput) -> Result { + path: syn::Path, + value: T, +} +impl darling::FromMeta for WithMetaPath { + fn from_meta(meta: &syn::Meta) -> Result { + Ok(Self { + value: ::from_meta(meta)?, + path: meta.path().clone(), + }) + } +} From 0f0e8446a4f5eb8b0498e55ed791ed9c6e74c54b Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sun, 10 Mar 2024 13:40:11 +0100 Subject: [PATCH 13/15] Logical types test fix --- serde_avro_derive/tests/derive_schema.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/serde_avro_derive/tests/derive_schema.rs b/serde_avro_derive/tests/derive_schema.rs index a1087dd..c12c689 100644 --- a/serde_avro_derive/tests/derive_schema.rs +++ b/serde_avro_derive/tests/derive_schema.rs @@ -236,21 +236,23 @@ fn logical_types() { { "name": "uuid", "type": { - "logical_type": "uuid", + "logicalType": "uuid", "type": "string" } }, { "name": "decimal", "type": { - "logical_type": "decimal", - "type": "double" + "logicalType": "decimal", + "type": "double", + "scale": 1, + "precision": 4 } }, { "name": "custom", "type": { - "logical_type": "CustomLogicalType", + "logicalType": "CustomLogicalType", "type": "string" } } From 7826739ddd65d17626894d3ae16a50b4d4c0ce9c Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sun, 10 Mar 2024 13:42:03 +0100 Subject: [PATCH 14/15] minor fix in test --- serde_avro_derive/tests/derive_schema.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/serde_avro_derive/tests/derive_schema.rs b/serde_avro_derive/tests/derive_schema.rs index c12c689..50300dd 100644 --- a/serde_avro_derive/tests/derive_schema.rs +++ b/serde_avro_derive/tests/derive_schema.rs @@ -3,12 +3,13 @@ use serde_avro_derive::BuildSchema; use pretty_assertions::assert_eq; fn test(expected: &str) { - let schema = clean_schema(&serde_json::to_string_pretty(&T::schema_mut()).unwrap()); + let schema_raw = serde_json::to_string_pretty(&T::schema_mut()).unwrap(); + let schema = clean_schema(&schema_raw); println!("{schema}"); assert_eq!(schema, expected); // Round trip - let schema_mut: serde_avro_fast::schema::SchemaMut = schema.parse().unwrap(); + let schema_mut: serde_avro_fast::schema::SchemaMut = schema_raw.parse().unwrap(); dbg!(&schema_mut); let schema2 = clean_schema(&serde_json::to_string_pretty(&schema_mut).unwrap()); assert_eq!(schema2, expected); From 2351d08f0b3030ec68bafb18d98a4b13b3714cfc Mon Sep 17 00:00:00 2001 From: Thomas BESSOU Date: Sun, 10 Mar 2024 13:52:13 +0100 Subject: [PATCH 15/15] Add version numbers where required for publishing --- serde_avro_derive/Cargo.toml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/serde_avro_derive/Cargo.toml b/serde_avro_derive/Cargo.toml index 151fe85..50ed568 100644 --- a/serde_avro_derive/Cargo.toml +++ b/serde_avro_derive/Cargo.toml @@ -9,12 +9,11 @@ workspace = ".." [dependencies] - serde_avro_derive_macros = { path = "../serde_avro_derive_macros" } - serde_avro_fast = { path = "../serde_avro_fast" } + serde_avro_derive_macros = { path = "../serde_avro_derive_macros", version = "0.1" } + serde_avro_fast = { path = "../serde_avro_fast", version = "1.0.0-rc.4" } [dev-dependencies] lazy_static = "1" pretty_assertions = "1" regex = "1" - serde_avro_derive = { path = "../serde_avro_derive" } serde_json = "1"