Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add derive for Schema #13

Merged
merged 18 commits into from
Mar 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
/target
/Cargo.lock
Cargo.lock
58 changes: 7 additions & 51 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,51 +1,7 @@
[package]
authors = ["Thomas BESSOU <[email protected]>"]
description = "An idiomatic implementation of serde/avro (de)serialization"
edition = "2021"
license = "LGPL-3.0-only"
name = "serde_avro_fast"
repository = "https://github.com/Ten0/serde_avro_fast"
version = "1.0.0-rc.4"

[features]
default = ["deflate"]
deflate = ["flate2"]
snappy = ["snap", "crc32fast"]
xz = ["xz2"]
zstandard = ["zstd"]

[dependencies]
bzip2 = { version = "0.4", optional = true }
crc32fast = { version = "1", optional = true }
flate2 = { version = "1", optional = true }
integer-encoding = { default-features = false, version = "4" }
num-traits = "0.2"
rand = "0.8"
rust_decimal = { version = "1", default-features = false, features = ["std", "serde-with-str"] }
serde = "1"
serde-transcode = "1"
serde_derive = "1"
serde_json = "1"
serde_serializer_quick_unsupported = "0.1"
snap = { version = "1", optional = true }
thiserror = "1"
xz2 = { version = "0.1", optional = true }
zstd = { version = "0.13", optional = true }

[dev-dependencies]
anyhow = "1"
apache-avro = { version = "0.14", features = ["bzip", "snappy", "xz", "zstandard"] }
criterion = "0.5"
lazy_static = "1"
paste = "1"
pretty_assertions = "1"
serde-tuple-vec-map = "1"
serde_bytes = "0.11"

[[bench]]
harness = false
name = "single"

[[bench]]
harness = false
name = "object_container_file_encoding"
[workspace]
members = [
"serde_avro_derive",
"serde_avro_derive_macros",
"serde_avro_fast",
]
resolver = "2"
19 changes: 19 additions & 0 deletions serde_avro_derive/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[package]
authors = ["Thomas BESSOU <[email protected]>"]
description = "Derive avro schema for Rust structs for serde_avro_fast"
edition = "2021"
license = "LGPL-3.0-only"
name = "serde_avro_derive"
repository = "https://github.com/Ten0/serde_avro_fast"
version = "0.1.0"
workspace = ".."

[dependencies]
serde_avro_derive_macros = { path = "../serde_avro_derive_macros", version = "0.1" }
serde_avro_fast = { path = "../serde_avro_fast", version = "1.0.0-rc.4" }

[dev-dependencies]
lazy_static = "1"
pretty_assertions = "1"
regex = "1"
serde_json = "1"
257 changes: 257 additions & 0 deletions serde_avro_derive/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
//! Bring automatic Avro Schema generation to [`serde_avro_fast`]
//!
//! See the [`#[derive(Schema)]`](derive@Schema) documentation for more
//! information

pub use serde_avro_fast;

pub use serde_avro_derive_macros::*;

use std::{any::TypeId, collections::HashMap};

use serde_avro_fast::schema::*;

/// We can automatically build a schema for this type (can be `derive`d)
///
/// This trait can be derived using [`#[derive(Schema)]`](derive@Schema)
pub trait BuildSchema {
/// Build a [`struct@Schema`] for this type
fn schema() -> Result<Schema, SchemaError> {
Self::schema_mut().try_into()
}
/// Build a [`SchemaMut`] for this type
fn schema_mut() -> SchemaMut {
let mut builder = SchemaBuilder::default();
Self::append_schema(&mut builder);
SchemaMut::from_nodes(builder.nodes)
}

/// Largely internal method to build the schema. Registers the schema within
/// the builder.
///
/// This does not check if this type already exists in the builder, so it
/// should never be called directly (instead, use
/// [`SchemaBuilder::find_or_build`])
///
/// The [`SchemaNode`] for this type should be put at the current end of the
/// `nodes` array, and its non-already-built dependencies should be put
/// after in the array.
fn append_schema(builder: &mut SchemaBuilder);

/// Largely internal type used by [`#[derive(Schema)]`](derive@Schema)
///
/// The TypeId of this type will be used to lookup whether the
/// [`SchemaNode`] for this type has already been built in the
/// [`SchemaBuilder`].
///
/// This indirection is required to allow non-static types to implement
/// [`BuildSchema`], and also enables using the same node for types that we
/// know map to the same schema.
type TypeLookup: std::any::Any;
}

/// Largely internal type used by [`#[derive(Schema)]`](derive@Schema)
///
/// You should typically not use this directly
#[derive(Default)]
pub struct SchemaBuilder {
pub nodes: Vec<SchemaNode>,
pub already_built_types: HashMap<TypeId, SchemaKey>,
_private: (),
}

impl SchemaBuilder {
/// Reserve a slot in the `nodes` array
///
/// After building the `SchemaNode`, it should be put at the corresponding
/// position in `nodes`.
pub fn reserve(&mut self) -> usize {
let idx = self.nodes.len();
self.nodes.push(SchemaNode::RegularType(RegularType::Null));
idx
}

pub fn find_or_build<T: BuildSchema + ?Sized>(&mut self) -> SchemaKey {
match self
.already_built_types
.entry(TypeId::of::<T::TypeLookup>())
{
std::collections::hash_map::Entry::Occupied(entry) => *entry.get(),
std::collections::hash_map::Entry::Vacant(entry) => {
let idx = SchemaKey::from_idx(self.nodes.len());
entry.insert(idx);
T::append_schema(self);
assert!(
self.nodes.len() > idx.idx(),
"append_schema should always insert at least a node \
(and its dependencies below itself)"
);
idx
}
}
}

pub fn build_logical_type<T: BuildSchema + ?Sized>(
&mut self,
logical_type: LogicalType,
) -> SchemaKey {
let reserved_schema_key = self.reserve();
let new_node = SchemaNode::LogicalType {
logical_type,
inner: self.find_or_build::<T>(),
};
self.nodes[reserved_schema_key] = new_node;
SchemaKey::from_idx(reserved_schema_key)
}
}

macro_rules! impl_primitive {
($($ty:ty, $variant:ident;)+) => {
$(
impl BuildSchema for $ty {
fn append_schema(builder: &mut SchemaBuilder) {
builder.nodes.push(SchemaNode::RegularType(RegularType::$variant));
}
type TypeLookup = Self;
}
)*
};
}
impl_primitive!(
(), Null;
bool, Boolean;
i32, Int;
i64, Long;
f32, Float;
f64, Double;
String, String;
Vec<u8>, Bytes;
);

macro_rules! impl_forward {
($($ty:ty, $to:ty;)+) => {
$(
impl BuildSchema for $ty {
fn append_schema(builder: &mut SchemaBuilder) {
<$to as BuildSchema>::append_schema(builder)
}
type TypeLookup = <$to as BuildSchema>::TypeLookup;
}
)*
};
}
impl_forward! {
str, String;
[u8], Vec<u8>;
u16, i32;
u32, i64;
u64, i64;
i8, i32;
i16, i32;
usize, i64;
}

macro_rules! impl_ptr {
($($($ty_path:ident)::+,)+) => {
$(
impl<T: BuildSchema + ?Sized> BuildSchema for $($ty_path)::+<T> {
fn append_schema(builder: &mut SchemaBuilder) {
<T as BuildSchema>::append_schema(builder)
}
type TypeLookup = T::TypeLookup;
}
)*
};
}
impl_ptr! {
Box,
std::sync::Arc,
std::rc::Rc,
std::cell::RefCell,
std::cell::Cell,
}
impl<T: BuildSchema + ?Sized> BuildSchema for &'_ T {
fn append_schema(builder: &mut SchemaBuilder) {
<T as BuildSchema>::append_schema(builder)
}
type TypeLookup = T::TypeLookup;
}
impl<T: BuildSchema + ?Sized> BuildSchema for &'_ mut T {
fn append_schema(builder: &mut SchemaBuilder) {
<T as BuildSchema>::append_schema(builder)
}
type TypeLookup = T::TypeLookup;
}

impl<T: BuildSchema> BuildSchema for Vec<T> {
fn append_schema(builder: &mut SchemaBuilder) {
let reserved_schema_key = builder.reserve();
let new_node =
SchemaNode::RegularType(RegularType::Array(Array::new(builder.find_or_build::<T>())));
builder.nodes[reserved_schema_key] = new_node;
}

type TypeLookup = Vec<T::TypeLookup>;
}

impl<T: BuildSchema> BuildSchema for [T] {
fn append_schema(builder: &mut SchemaBuilder) {
<Vec<T> as BuildSchema>::append_schema(builder)
}
type TypeLookup = <Vec<T> as BuildSchema>::TypeLookup;
}

impl<T: BuildSchema> BuildSchema for Option<T> {
fn append_schema(builder: &mut SchemaBuilder) {
let reserved_schema_key = builder.reserve();
let new_node = SchemaNode::RegularType(RegularType::Union(Union::new(vec![
builder.find_or_build::<()>(),
builder.find_or_build::<T>(),
])));
builder.nodes[reserved_schema_key] = new_node;
}

type TypeLookup = Option<T::TypeLookup>;
}

impl<const N: usize> BuildSchema for [u8; N] {
fn append_schema(builder: &mut SchemaBuilder) {
builder
.nodes
.push(SchemaNode::RegularType(RegularType::Fixed(Fixed::new(
Name::from_fully_qualified_name(format!("u8_array_{}", N)),
N,
))));
}
type TypeLookup = Self;
}

impl<S: std::ops::Deref<Target = str>, V: BuildSchema> BuildSchema for HashMap<S, V> {
fn append_schema(builder: &mut SchemaBuilder) {
let reserved_schema_key = builder.reserve();
let new_node =
SchemaNode::RegularType(RegularType::Map(Map::new(builder.find_or_build::<V>())));
builder.nodes[reserved_schema_key] = new_node;
}
type TypeLookup = HashMap<String, V::TypeLookup>;
}
impl<S: std::ops::Deref<Target = str>, V: BuildSchema> BuildSchema
for std::collections::BTreeMap<S, V>
{
fn append_schema(builder: &mut SchemaBuilder) {
<HashMap<String, V> as BuildSchema>::append_schema(builder)
}
type TypeLookup = <HashMap<String, V> as BuildSchema>::TypeLookup;
}

#[doc(hidden)]
pub fn hash_type_id(struct_name: &mut String, type_id: TypeId) {
use std::{
fmt::Write,
hash::{Hash as _, Hasher as _},
};
#[allow(deprecated)] // I actually want to not change hasher
let mut hasher = std::hash::SipHasher::new();
type_id.hash(&mut hasher);
write!(struct_name, "_{:016x?}", hasher.finish()).unwrap();
}
Loading
Loading