From 70885fcc0f9553b0e88865637a4e07dabf4386b8 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Tue, 10 Oct 2023 15:09:38 +0200 Subject: [PATCH 1/9] Fix prefix decoding Signed-off-by: Heinz N. Gies --- .../kafka_schema_registry_prefix.rs | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/preprocessor/kafka_schema_registry_prefix.rs b/src/preprocessor/kafka_schema_registry_prefix.rs index 906c50f30f..5ef766a921 100644 --- a/src/preprocessor/kafka_schema_registry_prefix.rs +++ b/src/preprocessor/kafka_schema_registry_prefix.rs @@ -32,20 +32,19 @@ impl Preprocessor for SchemaRegistryPrefix { mut meta: Value<'static>, ) -> Result, Value<'static>)>> { use std::io::Cursor; - if let Some(d) = data.get(8..) { + if let Some(d) = data.get(5..) { let mut c = Cursor::new(data); - let magic = c.read_u32::()?; + let magic = c.read_u8()?; if magic != 0 { - return Err(format!( - "Invalid magic bytes (0x00000000) for kafka wire format: {magic}" - ) - .into()); + return Err( + format!("Invalid magic bytes (0x00) for kafka wire format: {magic}").into(), + ); } let schema = c.read_u32::()?; meta.insert("schema_id", schema)?; Ok(vec![(d.to_vec(), meta)]) } else { - Err("Kafka schema registry Preprocessor: < 8 byte".into()) + Err("Kafka schema registry Preprocessor: < 5 byte".into()) } } } @@ -55,12 +54,12 @@ mod test { use super::*; use value_trait::ValueAccess; - /// Tests if the preprocessor errors on data that's less then 8 bytes + /// Tests if the preprocessor errors on data that's less then 5 bytes #[test] - fn test_preprocessor_less_then_8_bytes() { + fn test_preprocessor_less_then_5_bytes() { let mut pp = SchemaRegistryPrefix::default(); let mut ingest_ns = 0; - let data = vec![0, 0, 0, 0, 0, 0, 0]; + let data = vec![0, 0, 0, 0]; let meta = Value::object(); let res = pp.process(&mut ingest_ns, &data, meta); assert!(res.is_err()); @@ -71,7 +70,7 @@ mod test { fn test_preprocessor_schema_id() -> Result<()> { let mut pp = SchemaRegistryPrefix::default(); let mut ingest_ns = 0; - let data = vec![0, 0, 0, 0, 0, 0, 0, 1, 42]; + let data = vec![0, 0, 0, 0, 1, 42]; let meta = Value::object(); let mut res = pp.process(&mut ingest_ns, &data, meta)?; let (rest, meta) = res.pop().expect("no result"); @@ -85,7 +84,7 @@ mod test { fn test_preprocessor_invalid_magic_bytes() { let mut pp = SchemaRegistryPrefix::default(); let mut ingest_ns = 0; - let data = vec![0, 0, 0, 1, 0, 0, 0, 1]; + let data = vec![1, 0, 0, 0, 1]; let meta = Value::object(); let res = pp.process(&mut ingest_ns, &data, meta); assert!(res.is_err()); From e258c76406a828e43acfab106c70b1305e7bad34 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Wed, 11 Oct 2023 17:35:28 +0200 Subject: [PATCH 2/9] save Signed-off-by: Heinz N. Gies --- Cargo.lock | 336 ++++++++++++++- .../impls/gcs/resumable_upload_client.rs | 6 +- src/connectors/impls/gpubsub/producer.rs | 2 +- src/connectors/impls/otel/common.rs | 3 +- src/connectors/prelude.rs | 11 +- src/connectors/utils.rs | 2 - src/connectors/utils/socket.rs | 2 +- tremor-codec/Cargo.toml | 7 + tremor-codec/src/codec/avro.rs | 402 +++++++++++------- .../src/codec/kafka_schema_registry.rs | 150 +++++++ tremor-codec/src/errors.rs | 2 + tremor-codec/src/lib.rs | 2 + tremor-common/Cargo.toml | 3 + tremor-common/src/lib.rs | 3 + .../utils => tremor-common/src}/url.rs | 53 ++- 15 files changed, 774 insertions(+), 210 deletions(-) create mode 100644 tremor-codec/src/codec/kafka_schema_registry.rs rename {src/connectors/utils => tremor-common/src}/url.rs (84%) diff --git a/Cargo.lock b/Cargo.lock index cfdbde18fb..de3a25e091 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,17 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "addr" +version = "0.11.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "936697e9caf938eb2905036100edf8e1269da8291f8a02f5fe7b37073784eec0" +dependencies = [ + "no-std-net", + "psl", + "psl-types", +] + [[package]] name = "addr2line" version = "0.21.0" @@ -213,7 +224,7 @@ dependencies = [ "strum_macros", "thiserror", "typed-builder", - "uuid", + "uuid 1.4.1", "xz2", "zstd 0.12.4", ] @@ -365,6 +376,17 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "async-recursion" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", +] + [[package]] name = "async-signal" version = "0.2.3" @@ -627,7 +649,7 @@ dependencies = [ "http", "percent-encoding", "tracing", - "uuid", + "uuid 1.4.1", ] [[package]] @@ -1210,7 +1232,7 @@ checksum = "f1369bc6b9e9a7dfdae2055f6ec151fe9c554a9d23d357c0237cee2e25eaabb7" dependencies = [ "chrono", "chrono-tz-build", - "phf", + "phf 0.11.2", ] [[package]] @@ -1220,8 +1242,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2f5ebdc942f57ed96d560a6d1a459bae5851102a25d5bf89dc04ae453e31ecf" dependencies = [ "parse-zoneinfo", - "phf", - "phf_codegen", + "phf 0.11.2", + "phf_codegen 0.11.2", ] [[package]] @@ -1356,7 +1378,7 @@ dependencies = [ "thiserror", "tokio", "url", - "uuid", + "uuid 1.4.1", ] [[package]] @@ -1916,6 +1938,12 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d9d8664cf849d7d0f3114a3a387d2f5e4303176d746d5a951aaddc66dfe9240" +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + [[package]] name = "downcast-rs" version = "1.2.0" @@ -2853,6 +2881,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + [[package]] name = "io-lifetimes" version = "1.0.11" @@ -2941,6 +2975,25 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "json-pointer" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fe841b94e719a482213cee19dd04927cf412f26d8dc84c5a446c081e49c2997" +dependencies = [ + "serde_json", +] + +[[package]] +name = "jsonway" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "effcb749443c905fbaef49d214f8b1049c240e0adb7af9baa0e201e625e4f9de" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "jsonwebtoken" version = "7.2.0" @@ -2990,7 +3043,7 @@ dependencies = [ "petgraph", "pico-args", "regex", - "regex-syntax", + "regex-syntax 0.7.5", "string_cache", "term", "tiny-keccak", @@ -3216,6 +3269,29 @@ dependencies = [ "winapi", ] +[[package]] +name = "logos" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf8b031682c67a8e3d5446840f9573eb7fe26efe7ec8d195c9ac4c0647c502f1" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-derive" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d849148dbaf9661a6151d1ca82b13bb4c4c128146a88d05253b38d4e2f496c" +dependencies = [ + "beef", + "fnv", + "proc-macro2", + "quote", + "regex-syntax 0.6.29", + "syn 1.0.109", +] + [[package]] name = "lru" version = "0.11.1" @@ -3397,6 +3473,15 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" +[[package]] +name = "no-std-net" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bcece43b12349917e096cddfa66107277f123e6c96a5aea78711dc601a47152" +dependencies = [ + "serde", +] + [[package]] name = "nom" version = "5.1.3" @@ -3732,6 +3817,51 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" +[[package]] +name = "pest" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c022f1e7b65d6a24c0dbbd5fb344c66881bc01f3e5ae74a1c8100f2f985d98a4" +dependencies = [ + "memchr", + "thiserror", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35513f630d46400a977c4cb58f78e1bfbe01434316e60c37d27b9ad6139c66d8" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc9fc1b9e7057baba189b5c626e2d6f40681ae5b6eb064dc7c7834101ec8123a" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.38", +] + +[[package]] +name = "pest_meta" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1df74e9e7ec4053ceb980e7c0c8bd3594e977fde1af91daba9c928e8e8c6708d" +dependencies = [ + "once_cell", + "pest", + "sha2 0.10.8", +] + [[package]] name = "petgraph" version = "0.6.4" @@ -3742,6 +3872,15 @@ dependencies = [ "indexmap 2.0.2", ] +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_shared 0.8.0", +] + [[package]] name = "phf" version = "0.11.2" @@ -3751,16 +3890,36 @@ dependencies = [ "phf_shared 0.11.2", ] +[[package]] +name = "phf_codegen" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", +] + [[package]] name = "phf_codegen" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" dependencies = [ - "phf_generator", + "phf_generator 0.11.2", "phf_shared 0.11.2", ] +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared 0.8.0", + "rand 0.7.3", +] + [[package]] name = "phf_generator" version = "0.11.2" @@ -3771,6 +3930,15 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +dependencies = [ + "siphasher", +] + [[package]] name = "phf_shared" version = "0.10.0" @@ -3996,7 +4164,7 @@ dependencies = [ "rand 0.8.5", "rand_chacha 0.3.1", "rand_xorshift", - "regex-syntax", + "regex-syntax 0.7.5", "rusty-fork", "tempfile", "unarray", @@ -4061,6 +4229,33 @@ version = "2.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" +[[package]] +name = "protofish" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2a5fc771504e21bfc00513bfdb1f8d2c183bdb58a50c8ec31db946daa5a3257" +dependencies = [ + "bytes", + "pest", + "pest_derive", + "snafu", +] + +[[package]] +name = "psl" +version = "2.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1be0afcd844b15cfce18bf8cccf2dfa887a00a6454a9ea135f122b948cee91" +dependencies = [ + "psl-types", +] + +[[package]] +name = "psl-types" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33cb294fe86a74cbcf50d4445b37da762029549ebeea341421c7c70370f86cac" + [[package]] name = "quad-rand" version = "0.2.1" @@ -4102,6 +4297,7 @@ dependencies = [ "rand_chacha 0.2.2", "rand_core 0.5.1", "rand_hc", + "rand_pcg", ] [[package]] @@ -4162,6 +4358,15 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core 0.5.1", +] + [[package]] name = "rand_xorshift" version = "0.3.0" @@ -4266,7 +4471,7 @@ dependencies = [ "aho-corasick", "memchr", "regex-automata", - "regex-syntax", + "regex-syntax 0.7.5", ] [[package]] @@ -4277,14 +4482,20 @@ checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.7.5", ] [[package]] name = "regex-lite" -version = "0.1.0" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a6ebcd15653947e6140f59a9811a06ed061d18a5c35dfca2e2e4c5525696878" + +[[package]] +name = "regex-syntax" +version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f96ede7f386ba6e910092e7ccdc04176cface62abebea07ed6b46d870ed95ca2" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" @@ -4430,7 +4641,7 @@ dependencies = [ "serde_json", "tch", "thiserror", - "uuid", + "uuid 1.4.1", ] [[package]] @@ -4637,6 +4848,26 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "schema_registry_converter" +version = "3.1.0" +dependencies = [ + "apache-avro", + "byteorder", + "bytes", + "dashmap", + "futures", + "integer-encoding", + "logos", + "protofish", + "reqwest", + "serde", + "serde_json", + "tokio", + "url", + "valico", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -5104,6 +5335,27 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" +[[package]] +name = "snafu" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eab12d3c261b2308b0d80c26fffb58d17eba81a4be97890101f416b478c79ca7" +dependencies = [ + "doc-comment", + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1508efa03c362e23817f96cde18abed596a25219a8b2c66e8db33c03543d315b" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "snap" version = "1.1.0" @@ -6056,6 +6308,7 @@ name = "tremor-codec" version = "0.13.0-rc.16" dependencies = [ "apache-avro", + "async-recursion", "async-trait", "beef", "byteorder", @@ -6070,6 +6323,7 @@ dependencies = [ "reqwest", "rmp-serde", "ryu", + "schema_registry_converter", "serde", "serde_yaml 0.9.25", "simd-json", @@ -6078,9 +6332,10 @@ dependencies = [ "syslog_loose", "test-case", "tokio", + "tremor-common", "tremor-influx", "tremor-value", - "uuid", + "uuid 1.4.1", "value-trait", ] @@ -6090,10 +6345,13 @@ version = "0.13.0-rc.16" dependencies = [ "base64 0.21.4", "beef", + "lazy_static", "rand 0.8.5", + "regex", "serde", "simd-json", "simd-json-derive", + "test-case", "tokio", "url", ] @@ -6267,7 +6525,7 @@ dependencies = [ "tremor-value", "trust-dns-resolver", "url", - "uuid", + "uuid 1.4.1", "value-trait", "xz2", "zstd 0.12.4", @@ -6477,6 +6735,12 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +[[package]] +name = "ucd-trie" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" + [[package]] name = "unarray" version = "0.1.4" @@ -6577,6 +6841,15 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "uritemplate-next" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcde98d1fc3f528255b1ecb22fb688ee0d23deb672a8c57127df10b98b4bd18c" +dependencies = [ + "regex", +] + [[package]] name = "url" version = "2.4.1" @@ -6607,6 +6880,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "uuid" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" +dependencies = [ + "getrandom 0.2.10", +] + [[package]] name = "uuid" version = "1.4.1" @@ -6617,6 +6899,28 @@ dependencies = [ "serde", ] +[[package]] +name = "valico" +version = "3.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "647856408e327686b6640397d19f60fac3e64c3bfaa6afc409da63ef7da45edb" +dependencies = [ + "addr", + "base64 0.13.1", + "chrono", + "json-pointer", + "jsonway", + "percent-encoding", + "phf 0.8.0", + "phf_codegen 0.8.0", + "regex", + "serde", + "serde_json", + "uritemplate-next", + "url", + "uuid 0.8.2", +] + [[package]] name = "value-bag" version = "1.4.1" diff --git a/src/connectors/impls/gcs/resumable_upload_client.rs b/src/connectors/impls/gcs/resumable_upload_client.rs index 3ffd0cdf6c..3e5cf0f9d8 100644 --- a/src/connectors/impls/gcs/resumable_upload_client.rs +++ b/src/connectors/impls/gcs/resumable_upload_client.rs @@ -16,10 +16,7 @@ use crate::{ connectors::{ google::TokenSrc, prelude::{Result, Url}, - utils::{ - object_storage::{BufferPart, ObjectId}, - url::HttpsDefaults, - }, + utils::object_storage::{BufferPart, ObjectId}, }, errors::err_gcs, }; @@ -30,6 +27,7 @@ use hyper::{header, Body, Method, Request, Response, StatusCode}; use hyper_rustls::HttpsConnectorBuilder; use std::time::Duration; use tokio::time::sleep; +use tremor_common::url::HttpsDefaults; pub(crate) type GcsHttpClient = hyper::Client>; diff --git a/src/connectors/impls/gpubsub/producer.rs b/src/connectors/impls/gpubsub/producer.rs index 4033d4e79a..cce134f60f 100644 --- a/src/connectors/impls/gpubsub/producer.rs +++ b/src/connectors/impls/gpubsub/producer.rs @@ -18,7 +18,6 @@ use crate::connectors::prelude::{ SinkManagerBuilder, SinkReply, Url, }; use crate::connectors::sink::Sink; -use crate::connectors::utils::url::HttpsDefaults; use crate::connectors::{ CodecReq, Connector, ConnectorBuilder, ConnectorConfig, ConnectorContext, ConnectorType, Context, @@ -33,6 +32,7 @@ use tokio::time::timeout; use tonic::codegen::InterceptedService; use tonic::transport::{Certificate, Channel, ClientTlsConfig}; use tonic::Code; +use tremor_common::url::HttpsDefaults; use tremor_pipeline::{ConfigImpl, Event}; use tremor_value::Value; use value_trait::ValueAccess; diff --git a/src/connectors/impls/otel/common.rs b/src/connectors/impls/otel/common.rs index d93262eb47..beb0b47011 100644 --- a/src/connectors/impls/otel/common.rs +++ b/src/connectors/impls/otel/common.rs @@ -14,9 +14,10 @@ #![allow(dead_code)] -use crate::connectors::utils::{pb, url}; +use crate::connectors::utils::pb; use crate::errors::Result; use simd_json::Builder; +use tremor_common::url; use tremor_otelapis::opentelemetry::proto::common::v1::{ any_value, AnyValue, ArrayValue, InstrumentationLibrary, KeyValue, KeyValueList, StringKeyValue, }; diff --git a/src/connectors/prelude.rs b/src/connectors/prelude.rs index 245a864659..ff975ebc5f 100644 --- a/src/connectors/prelude.rs +++ b/src/connectors/prelude.rs @@ -26,10 +26,7 @@ pub(crate) use crate::{ SourceManagerBuilder, SourceReply, StreamReader, }, spawn_task, - utils::{ - reconnect::Attempt, - url::{Defaults, HttpsDefaults, Url}, - }, + utils::reconnect::Attempt, Alias, CodecReq, Connector, ConnectorBuilder, ConnectorContext, ConnectorType, Context, StreamDone, StreamIdGen, ACCEPT_TIMEOUT, }, @@ -39,8 +36,12 @@ pub(crate) use crate::{ utils::hostname, Event, }; + pub(crate) use std::sync::atomic::Ordering; -pub(crate) use tremor_common::ports::{Port, ERR, IN, OUT}; +pub(crate) use tremor_common::{ + ports::{Port, ERR, IN, OUT}, + url::{Defaults, HttpsDefaults, Url}, +}; pub use tremor_pipeline::{ CbAction, ConfigImpl, EventIdGenerator, EventOriginUri, DEFAULT_STREAM_ID, }; diff --git a/src/connectors/utils.rs b/src/connectors/utils.rs index 9e9264b3d2..6eb230fb3e 100644 --- a/src/connectors/utils.rs +++ b/src/connectors/utils.rs @@ -31,8 +31,6 @@ pub(crate) mod reconnect; pub(crate) mod socket; /// Transport Level Security facilities pub(crate) mod tls; -/// URL untils -pub(crate) mod url; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub(crate) struct ConnectionMeta { pub(crate) host: String, diff --git a/src/connectors/utils/socket.rs b/src/connectors/utils/socket.rs index d7a7d1646f..e5701a13d6 100644 --- a/src/connectors/utils/socket.rs +++ b/src/connectors/utils/socket.rs @@ -13,11 +13,11 @@ // limitations under the License. use crate::connectors::prelude::*; -use crate::connectors::utils::url::{Defaults, Url}; use crate::errors::{Error, Result}; use socket2::{Domain, Protocol, SockAddr, Socket, Type}; use tokio::net::{lookup_host, UdpSocket}; use tokio::net::{TcpListener, TcpStream}; +use tremor_common::url::{Defaults, Url}; #[derive(Debug, Clone, Deserialize)] #[serde(deny_unknown_fields, rename_all = "UPPERCASE")] diff --git a/tremor-codec/Cargo.toml b/tremor-codec/Cargo.toml index 909e922a59..b45232353e 100644 --- a/tremor-codec/Cargo.toml +++ b/tremor-codec/Cargo.toml @@ -12,6 +12,7 @@ version = "0.13.0-rc.16" [dependencies] tremor-value = { version = "0.13.0-rc.16", path = "../tremor-value" } +tremor-common = { version = "0.13.0-rc.16", path = "../tremor-common" } tokio = { version = "1.32", features = ["full"] } async-trait = "0.1" error-chain = "0.12" @@ -29,8 +30,13 @@ simdutf8 = "0.1" chrono = "0.4" uuid = { version = "1.3", features = ["v4"] } matches = "0.1" +async-recursion = "1" # codecs +schema_registry_converter = { version = "3", features = [ + "avro", + "easy", +], path = "../../schema_registry_converter" } reqwest = { version = "0.11", default-features = false, features = [ "rustls-tls", "rustls-tls-native-roots", @@ -45,6 +51,7 @@ apache-avro = { version = "0.16", features = [ "zstandard", "bzip2", ] } +# git = "https://github.com/gklijs/schema_registry_converter.git", rev = "72d89d5" # we need this for v0.16 of avro versioning serde = "1" rmp-serde = "1.1" syslog_loose = "0.19" diff --git a/tremor-codec/src/codec/avro.rs b/tremor-codec/src/codec/avro.rs index 871c02da02..fbdab2154c 100644 --- a/tremor-codec/src/codec/avro.rs +++ b/tremor-codec/src/codec/avro.rs @@ -19,6 +19,7 @@ //! ## Mappings //! //! | avro | tremor (to) | tremor (from) | +//! |------|-------------|---------------| //! | null | null | null | //! | boolean | bool | bool | //! | int | i64 | i64, u64| @@ -41,13 +42,14 @@ //! | timestamp-micros | i64 | i64, u64 | //! | duration | bytes[12] | bytes[12] | -use std::collections::HashMap; +use std::{collections::HashMap, sync::Arc}; use crate::prelude::*; use apache_avro::{ schema::Name, types::Value as AvroValue, Codec as Compression, Decimal, Duration, Reader, Schema, Writer, }; +use schema_registry_converter::avro_common::AvroSchema; use serde::Deserialize; use value_trait::TryTypeError; @@ -64,9 +66,6 @@ impl AvroRegistry { fn get_schema_by_id(&self, id: u32) -> Option<&Schema> { self.by_id.get(&id) } - fn get_schema_by_name(&self, name: &Name) -> Option<&Schema> { - self.by_name.get(name) - } async fn maybe_fetch_id(&mut self, id: u32) -> Result<()> { if self.by_id.contains_key(&id) { @@ -129,170 +128,215 @@ impl Avro { None => Err("Missing avro schema".into()), } } - #[allow(clippy::too_many_lines)] - fn to_avro_value(&self, data: &Value, schema: &Schema) -> Result { - Ok(match schema { - Schema::Null => { - let got = data.value_type(); - if got == ValueType::Null { - AvroValue::Null - } else { - return Err(TryTypeError { - expected: ValueType::Null, - got, - } - .into()); + + async fn write_value<'a, 'v>( + &self, + data: &'a Value<'v>, + writer: &mut Writer<'a, Vec>, + ) -> Result<()> { + let v = value_to_avro(data, writer.schema(), &self.registry).await?; + writer.append(v)?; + + Ok(()) + } +} + +pub(crate) enum SchemaWrapper<'a> { + Schema(Arc), + Ref(&'a Schema), +} + +impl<'a> SchemaWrapper<'a> { + fn schema(&self) -> &Schema { + match self { + SchemaWrapper::Schema(s) => &s.parsed, + SchemaWrapper::Ref(s) => s, + } + } +} +#[async_trait::async_trait] +pub(crate) trait SchemaResover { + async fn by_name(&self, name: &Name) -> Option; +} + +#[async_trait::async_trait] +impl SchemaResover for AvroRegistry { + async fn by_name(&self, name: &Name) -> Option { + self.by_name.get(name).map(SchemaWrapper::Ref) + } +} + +#[allow(clippy::too_many_lines)] +#[async_recursion::async_recursion] +pub(crate) async fn value_to_avro<'v, R>( + data: &Value<'v>, + schema: &Schema, + resolver: &R, +) -> Result +where + R: SchemaResover + Sync, +{ + Ok(match schema { + Schema::Null => { + let got = data.value_type(); + if got == ValueType::Null { + AvroValue::Null + } else { + return Err(TryTypeError { + expected: ValueType::Null, + got, } + .into()); } - Schema::Boolean => AvroValue::Boolean(data.try_as_bool()?), - Schema::Int => AvroValue::Int(data.try_as_i32()?), - Schema::Long => AvroValue::Long(data.try_as_i64()?), - Schema::Float => AvroValue::Float(data.try_as_f32()?), - Schema::Double => AvroValue::Double(data.try_as_f64()?), - Schema::Bytes => AvroValue::Bytes(data.try_as_bytes()?.to_vec()), - Schema::String => AvroValue::String(data.try_as_str()?.to_string()), - Schema::Array(s) => AvroValue::Array( - data.try_as_array()? - .iter() - .map(|d| self.to_avro_value(d, s)) - .collect::>()?, - ), - Schema::Map(s) => AvroValue::Map( - data.try_as_object()? - .iter() - .map(|(k, v)| Ok((k.to_string(), self.to_avro_value(v, s)?))) - .collect::>()?, - ), - Schema::Union(s) => { - for (i, variant) in s.variants().iter().enumerate() { - if let Ok(v) = self.to_avro_value(data, variant) { - return Ok(AvroValue::Union(u32::try_from(i)?, Box::new(v))); - } - } - return Err(format!("No variant matched for {}", data.value_type()).into()); + } + Schema::Boolean => AvroValue::Boolean(data.try_as_bool()?), + Schema::Int => AvroValue::Int(data.try_as_i32()?), + Schema::Long => AvroValue::Long(data.try_as_i64()?), + Schema::Float => AvroValue::Float(data.try_as_f32()?), + Schema::Double => AvroValue::Double(data.try_as_f64()?), + Schema::Bytes => AvroValue::Bytes(data.try_as_bytes()?.to_vec()), + Schema::String => AvroValue::String(data.try_as_str()?.to_string()), + Schema::Array(s) => { + let data = data.try_as_array()?; + let mut res = Vec::with_capacity(data.len()); + for d in data { + res.push(value_to_avro(d, s, resolver).await?); } - Schema::Record(r) => { - let mut res: Vec<(String, AvroValue)> = Vec::with_capacity(r.fields.len()); - for f in &r.fields { - let d = data.get(f.name.as_str()); - - if d.is_none() && f.default.is_some() { - // from_value(f.default.clone().ok_or("unreachable")?)?; - let val = - Value::<'static>::deserialize(f.default.clone().ok_or("unreachable")?) - .map_err(|e| format!("Failed to deserialize default value: {e}"))?; - res.push((f.name.clone(), self.to_avro_value(&val, &f.schema)?)); - continue; - } else if d.is_none() && f.is_nullable() { - res.push((f.name.clone(), AvroValue::Null)); - } else if let Some(d) = d { - res.push((f.name.clone(), self.to_avro_value(d, &f.schema)?)); - } else { - return Err(format!("Missing field {}", f.name).into()); - } - } - AvroValue::Record(res) + AvroValue::Array(res) + } + Schema::Map(s) => { + let obj = data.try_as_object()?; + let mut res = HashMap::with_capacity(obj.len()); + for (k, v) in obj { + res.insert(k.to_string(), value_to_avro(v, s, resolver).await?); } - Schema::Enum(e) => { - let this = data.try_as_str()?; - for (i, variant) in e.symbols.iter().enumerate() { - if variant == this { - return Ok(AvroValue::Enum(u32::try_from(i)?, variant.clone())); - } + AvroValue::Map(res) + } + Schema::Union(s) => { + for (i, variant) in s.variants().iter().enumerate() { + if let Ok(v) = value_to_avro(data, variant, resolver).await { + return Ok(AvroValue::Union(u32::try_from(i)?, Box::new(v))); } - return Err(format!("No variant matched for {this}").into()); } - Schema::Fixed(f) => { - // TODO: possibly allow other types here - let b = data.try_as_bytes()?; - if b.len() != f.size { - return Err(format!( - "Invalid size for fixed type, expected {} got {}", - f.size, - b.len() - ) - .into()); + return Err(format!("No variant matched for {}", data.value_type()).into()); + } + Schema::Record(r) => { + let mut res: Vec<(String, AvroValue)> = Vec::with_capacity(r.fields.len()); + for f in &r.fields { + let d = data.get(f.name.as_str()); + + if d.is_none() && f.default.is_some() { + // from_value(f.default.clone().ok_or("unreachable")?)?; + let val = + Value::<'static>::deserialize(f.default.clone().ok_or("unreachable")?) + .map_err(|e| format!("Failed to deserialize default value: {e}"))?; + res.push(( + f.name.clone(), + value_to_avro(&val, &f.schema, resolver).await?, + )); + continue; + } else if d.is_none() && f.is_nullable() { + res.push((f.name.clone(), AvroValue::Null)); + } else if let Some(d) = d { + res.push((f.name.clone(), value_to_avro(d, &f.schema, resolver).await?)); + } else { + return Err(format!("Missing field {}", f.name).into()); } - AvroValue::Fixed(b.len(), b.to_vec()) - } - Schema::Decimal(_s) => { - // TODO: possibly allow other types here - let d = data.try_as_bytes()?; - let d = Decimal::try_from(d).map_err(|e| format!("Invalid decimal: {e}"))?; - AvroValue::Decimal(d) } - Schema::Uuid => AvroValue::Uuid(data.try_as_str()?.parse()?), // TODO: allow bytes and eventually 128 bit numbers - Schema::Date => AvroValue::Date(data.try_as_i32()?), // TODO: allow strings and other date types? - Schema::TimeMillis => AvroValue::TimeMillis(data.try_as_i32()?), - Schema::TimeMicros => AvroValue::TimeMicros(data.try_as_i64()?), - Schema::TimestampMillis => AvroValue::TimestampMillis(data.try_as_i64()?), - Schema::TimestampMicros => AvroValue::TimestampMicros(data.try_as_i64()?), - Schema::LocalTimestampMillis => AvroValue::LocalTimestampMillis(data.try_as_i64()?), - Schema::LocalTimestampMicros => AvroValue::LocalTimestampMicros(data.try_as_i64()?), - Schema::Duration => { - let v: [u8; 12] = data - .as_bytes() - .and_then(|v| v.try_into().ok()) - .ok_or("Invalid duration")?; - - AvroValue::Duration(Duration::from(v)) - } - Schema::Ref { name } => { - let schema = self.registry.get_schema_by_name(name).ok_or_else(|| { - format!("Schema refferences are not supported, asking for {name}") - })?; - self.to_avro_value(data, schema)? - } - }) - } - fn convert_avro_value(val: AvroValue) -> Result> { - Ok(match val { - AvroValue::Null => Value::const_null(), - AvroValue::Boolean(v) => Value::from(v), - AvroValue::Int(v) | AvroValue::TimeMillis(v) | AvroValue::Date(v) => Value::from(v), - AvroValue::Long(v) - | AvroValue::TimestampMicros(v) - | AvroValue::TimestampMillis(v) - | AvroValue::LocalTimestampMillis(v) - | AvroValue::LocalTimestampMicros(v) - | AvroValue::TimeMicros(v) => Value::from(v), - AvroValue::Float(v) => Value::from(v), - AvroValue::Double(v) => Value::from(v), - AvroValue::Bytes(v) | AvroValue::Fixed(_, v) => Value::Bytes(v.into()), - AvroValue::String(v) | AvroValue::Enum(_, v) => Value::from(v), - AvroValue::Union(_, v) => Self::convert_avro_value(*v)?, - AvroValue::Array(v) => Value::Array( - v.into_iter() - .map(Self::convert_avro_value) - .collect::>()?, - ), - AvroValue::Map(v) => Value::from( - v.into_iter() - .map(|(k, v)| Ok((k.into(), Self::convert_avro_value(v)?))) - .collect::>()?, - ), - AvroValue::Record(r) => Value::from( - r.into_iter() - .map(|(k, v)| Ok((k.into(), Self::convert_avro_value(v)?))) - .collect::>()?, - ), - AvroValue::Decimal(v) => { - let d = >::try_from(&v)?; - Value::Bytes(d.into()) + AvroValue::Record(res) + } + Schema::Enum(e) => { + let this = data.try_as_str()?; + for (i, variant) in e.symbols.iter().enumerate() { + if variant == this { + return Ok(AvroValue::Enum(u32::try_from(i)?, variant.clone())); + } } - AvroValue::Duration(v) => { - let d: [u8; 12] = v.into(); - Value::Bytes(d.to_vec().into()) + return Err(format!("No variant matched for {this}").into()); + } + Schema::Fixed(f) => { + // TODO: possibly allow other types here + let b = data.try_as_bytes()?; + if b.len() != f.size { + return Err(format!( + "Invalid size for fixed type, expected {} got {}", + f.size, + b.len() + ) + .into()); } - AvroValue::Uuid(v) => Value::from(v.to_string()), - }) - } - fn write_value<'a>(&self, data: &'a Value, writer: &mut Writer<'a, Vec>) -> Result<()> { - let v = self.to_avro_value(data, writer.schema())?; - writer.append(v)?; + AvroValue::Fixed(b.len(), b.to_vec()) + } + Schema::Decimal(_s) => { + // TODO: possibly allow other types here + let d = data.try_as_bytes()?; + let d = Decimal::try_from(d).map_err(|e| format!("Invalid decimal: {e}"))?; + AvroValue::Decimal(d) + } + Schema::Uuid => AvroValue::Uuid(data.try_as_str()?.parse()?), // TODO: allow bytes and eventually 128 bit numbers + Schema::Date => AvroValue::Date(data.try_as_i32()?), // TODO: allow strings and other date types? + Schema::TimeMillis => AvroValue::TimeMillis(data.try_as_i32()?), + Schema::TimeMicros => AvroValue::TimeMicros(data.try_as_i64()?), + Schema::TimestampMillis => AvroValue::TimestampMillis(data.try_as_i64()?), + Schema::TimestampMicros => AvroValue::TimestampMicros(data.try_as_i64()?), + Schema::LocalTimestampMillis => AvroValue::LocalTimestampMillis(data.try_as_i64()?), + Schema::LocalTimestampMicros => AvroValue::LocalTimestampMicros(data.try_as_i64()?), + Schema::Duration => { + let v: [u8; 12] = data + .as_bytes() + .and_then(|v| v.try_into().ok()) + .ok_or("Invalid duration")?; + + AvroValue::Duration(Duration::from(v)) + } + Schema::Ref { name } => { + let schema = resolver.by_name(name).await.ok_or_else(|| { + format!("Schema refferences are not supported, asking for {name}") + })?; + value_to_avro(data, schema.schema(), resolver).await? + } + }) +} - Ok(()) - } +pub(crate) fn avro_to_value(val: AvroValue) -> Result> { + Ok(match val { + AvroValue::Null => Value::const_null(), + AvroValue::Boolean(v) => Value::from(v), + AvroValue::Int(v) | AvroValue::TimeMillis(v) | AvroValue::Date(v) => Value::from(v), + AvroValue::Long(v) + | AvroValue::TimestampMicros(v) + | AvroValue::TimestampMillis(v) + | AvroValue::LocalTimestampMillis(v) + | AvroValue::LocalTimestampMicros(v) + | AvroValue::TimeMicros(v) => Value::from(v), + AvroValue::Float(v) => Value::from(v), + AvroValue::Double(v) => Value::from(v), + AvroValue::Bytes(v) | AvroValue::Fixed(_, v) => Value::Bytes(v.into()), + AvroValue::String(v) | AvroValue::Enum(_, v) => Value::from(v), + AvroValue::Union(_, v) => avro_to_value(*v)?, + AvroValue::Array(v) => { + Value::Array(v.into_iter().map(avro_to_value).collect::>()?) + } + AvroValue::Map(v) => Value::from( + v.into_iter() + .map(|(k, v)| Ok((k.into(), avro_to_value(v)?))) + .collect::>()?, + ), + AvroValue::Record(r) => Value::from( + r.into_iter() + .map(|(k, v)| Ok((k.into(), avro_to_value(v)?))) + .collect::>()?, + ), + AvroValue::Decimal(v) => { + let d = >::try_from(&v)?; + Value::Bytes(d.into()) + } + AvroValue::Duration(v) => { + let d: [u8; 12] = v.into(); + Value::Bytes(d.to_vec().into()) + } + AvroValue::Uuid(v) => Value::from(v.to_string()), + }) } #[async_trait::async_trait] @@ -312,9 +356,18 @@ impl Codec for Avro { _ingest_ns: u64, meta: Value<'input>, ) -> Result, Value<'input>)>> { - let reader = Reader::new(&*data)?; + let schema = if let Some(schema_id) = meta.get_u32("schema_id") { + self.registry.maybe_fetch_id(schema_id).await?; + self.registry + .get_schema_by_id(schema_id) + .ok_or_else(|| format!("No schema found for id {schema_id} in registry"))? + } else { + &self.schema + }; + + let reader = Reader::with_schema(schema, &*data)?; - let mut vals = reader.map(|v| Self::convert_avro_value(v?)); + let mut vals = reader.map(|v| avro_to_value(v?)); vals.next().map(|v| v.map(|v| (v, meta))).transpose() } @@ -332,7 +385,7 @@ impl Codec for Avro { Vec::with_capacity(AVRO_BUFFER_CAP), self.compression, ); - self.write_value(data, &mut writer)?; + self.write_value(data, &mut writer).await?; writer.into_inner().map_err(Error::from) } @@ -662,6 +715,35 @@ mod test { assert_eq!(decoded.0, expected); Ok(()) } + + #[tokio::test(flavor = "multi_thread")] + async fn decode_smaple() -> Result<()> { + // [b'O', b'b', b'j', 1u8] + let from_kafka = vec![0_u8, 0, 0, 0, 1, 12, 115, 116, 114, 105, 110, 103]; + // let from_kafka = vec![b'O', b'b', b'j', 1_u8, 12, 115, 116, 114, 105, 110, 103]; + // let mut from_kafka = vec![12, 115, 116, 114, 105, 110, 103_u8]; + + let mut codec = test_codec(literal!( + { + "type": "record", + "name": "record", + "fields": [ + {"name": "one", "type": "string"}, + ] + } + ))?; + + let decoded = literal!({"one": "string"}); + + let mut encoded = codec.encode(&decoded, &Value::const_null()).await?; + assert_eq!(encoded, from_kafka); + + codec + .decode(&mut encoded, 0, Value::object()) + .await? + .expect("no data"); + Ok(()) + } #[tokio::test(flavor = "multi_thread")] async fn round_robin() -> Result<()> { let mut codec = test_codec(literal!( diff --git a/tremor-codec/src/codec/kafka_schema_registry.rs b/tremor-codec/src/codec/kafka_schema_registry.rs new file mode 100644 index 0000000000..4de5aa7633 --- /dev/null +++ b/tremor-codec/src/codec/kafka_schema_registry.rs @@ -0,0 +1,150 @@ +// Copyright 2020-2021, The Tremor Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! The `avro` codec supports Apache Avro binary encoding. +//! +//! The codec is configured with a codec following the avro json codec specification +//! +//! ## Mappings +//! +//! The same as the [`avro` codec](../avro) + +use crate::{ + avro::{avro_to_value, value_to_avro, SchemaResover, SchemaWrapper}, + prelude::*, +}; +use apache_avro::schema::Name; +use schema_registry_converter::async_impl::schema_registry::SrSettings; +use schema_registry_converter::{ + async_impl::easy_avro::{EasyAvroDecoder as AvroDecoder, EasyAvroEncoder as AvroEncoder}, + schema_registry_common::SubjectNameStrategy, +}; +use tremor_common::url::{HttpDefaults, Url}; + +pub struct Ksr { + registry: Url, + settings: SrSettings, + decoder: AvroDecoder, + encoder: AvroEncoder, +} + +#[allow(clippy::missing_fields_in_debug)] +impl std::fmt::Debug for Ksr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("KSR") + .field("registry", &self.registry) + .field("settings", &self.settings) + .finish() + } +} + +impl Clone for Ksr { + fn clone(&self) -> Self { + Self { + registry: self.registry.clone(), + settings: self.settings.clone(), + decoder: AvroDecoder::new(self.settings.clone()), + encoder: AvroEncoder::new(self.settings.clone()), + } + } +} + +impl Ksr { + pub(crate) fn from_config(config: Option<&Value>) -> Result> { + let url = config + .get_str("url") + .map(ToString::to_string) + .ok_or("Missing URL config for schema registry codec")?; + + let registry = Url::parse(&url)?; + let settings = SrSettings::new(url.to_string()); + let decoder = AvroDecoder::new(settings.clone()); + let encoder = AvroEncoder::new(settings.clone()); + Ok(Box::new(Ksr { + registry, + settings, + decoder, + encoder, + })) + } +} + +struct RecordResolver<'a> { + encoder: &'a AvroEncoder, +} + +#[async_trait::async_trait] +impl SchemaResover for RecordResolver<'_> { + async fn by_name(&self, name: &Name) -> Option { + self.encoder + .get_schema_and_id( + &name.name, + SubjectNameStrategy::RecordNameStrategy(name.name.clone()), + ) + .await + .ok() + .map(SchemaWrapper::Schema) + } +} + +#[async_trait::async_trait()] +impl Codec for Ksr { + fn name(&self) -> &str { + todo!() + } + + async fn decode<'input>( + &mut self, + data: &'input mut [u8], + _ingest_ns: u64, + meta: Value<'input>, + ) -> Result, Value<'input>)>> { + let r = self.decoder.decode(Some(data)).await?; + let v = avro_to_value(r.value)?; + Ok(Some((v, meta))) + } + + #[must_use] + async fn encode(&mut self, data: &Value, meta: &Value) -> Result> { + let key = meta.try_get_str("key")?.ok_or("Missing key")?; + let topic = meta + .try_get_str("topic")? + .ok_or("Missing topic")? + .to_string(); + let strategy = SubjectNameStrategy::TopicNameStrategy(topic, true); + + let schema = self + .encoder + .get_schema_and_id(key, strategy.clone()) + .await?; + + // self.encoder.encode(values, subject_name_strategy) + let avro_value = value_to_avro( + data, + &schema.parsed, + &RecordResolver { + encoder: &self.encoder, + }, + ) + .await?; + Ok(self + .encoder + .encode(vec![(key, avro_value)], strategy) + .await?) + } + + fn boxed_clone(&self) -> Box { + Box::new(self.clone()) + } +} diff --git a/tremor-codec/src/errors.rs b/tremor-codec/src/errors.rs index 40f2bb48d7..27999c8441 100644 --- a/tremor-codec/src/errors.rs +++ b/tremor-codec/src/errors.rs @@ -56,6 +56,8 @@ error_chain! { SimdUtf8(simdutf8::basic::Utf8Error); TremorCodec(crate::codec::tremor::Error); AvroError(apache_avro::Error); + UrlParseError(tremor_common::url::ParseError); + SRCError(schema_registry_converter::error::SRCError); } errors { diff --git a/tremor-codec/src/lib.rs b/tremor-codec/src/lib.rs index 969fa284ab..fced2d3e0e 100644 --- a/tremor-codec/src/lib.rs +++ b/tremor-codec/src/lib.rs @@ -39,6 +39,7 @@ mod codec { pub(crate) mod influx; /// JSON codec pub mod json; + pub(crate) mod kafka_schema_registry; pub(crate) mod msgpack; pub(crate) mod null; pub(crate) mod statsd; @@ -120,6 +121,7 @@ impl Debug for dyn Codec { pub fn resolve(config: &config::Codec) -> Result> { match config.name.as_str() { "avro" => avro::Avro::from_config(config.config.as_ref()), + "kafka-schema-registry" => kafka_schema_registry::Ksr::from_config(config.config.as_ref()), "binary" => Ok(Box::new(binary::Binary {})), "binflux" => Ok(Box::::default()), "csv" => Ok(Box::new(csv::Csv {})), diff --git a/tremor-common/Cargo.toml b/tremor-common/Cargo.toml index c288d4ff96..9184f005dc 100644 --- a/tremor-common/Cargo.toml +++ b/tremor-common/Cargo.toml @@ -17,6 +17,9 @@ url = "2" simd-json = { version = "0.11", features = ["known-key"] } simd-json-derive = "0.11" base64 = "0.21" +regex = "*" +lazy_static = "*" [dev-dependencies] tokio = { version = "1", features = ["full"] } +test-case = "3" diff --git a/tremor-common/src/lib.rs b/tremor-common/src/lib.rs index d8568dece1..3a61e44163 100644 --- a/tremor-common/src/lib.rs +++ b/tremor-common/src/lib.rs @@ -48,6 +48,9 @@ pub mod ports; /// Base64 engine pub mod base64; +/// URL with defaults +pub mod url; + pub use errors::Error; #[cfg(test)] diff --git a/src/connectors/utils/url.rs b/tremor-common/src/url.rs similarity index 84% rename from src/connectors/utils/url.rs rename to tremor-common/src/url.rs index 2869e2b108..ec1d3c67e4 100644 --- a/src/connectors/utils/url.rs +++ b/tremor-common/src/url.rs @@ -12,17 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::errors::Result; +use lazy_static::lazy_static; use regex::Regex; use serde::{Deserialize, Serialize}; use std::marker::PhantomData; +pub use url::ParseError; lazy_static! { // ALLOW: we know this regex is valid static ref URL_SCHEME_REGEX: Regex = Regex::new("^[A-Za-z-]+://").expect("Invalid Regex"); } -pub(crate) trait Defaults { +/// Default values for a URL +pub trait Defaults { /// Default scheme const SCHEME: &'static str; /// Default host @@ -32,13 +34,14 @@ pub(crate) trait Defaults { } /// Default HTTP -pub(crate) struct HttpDefaults; +pub struct HttpDefaults; impl Defaults for HttpDefaults { const HOST: &'static str = "localhost"; const SCHEME: &'static str = "http"; const PORT: u16 = 80; } -pub(crate) struct HttpsDefaults; +/// Default HTTPS +pub struct HttpsDefaults; impl Defaults for HttpsDefaults { const SCHEME: &'static str = "http"; const HOST: &'static str = "localhost"; @@ -47,7 +50,7 @@ impl Defaults for HttpsDefaults { /// Endpoint URL #[derive(Serialize)] -pub(crate) struct Url { +pub struct Url { url: url::Url, #[serde(skip)] _marker: PhantomData, @@ -132,25 +135,30 @@ impl Default for Url { } impl Url { - pub(crate) fn parse(input: &str) -> Result { - let parsed = if URL_SCHEME_REGEX.is_match(input) { - url::Url::parse(input) + /// Parse a URL + /// # Errors + /// if the URL is invalid + pub fn parse(input: &str) -> Result { + let url = if URL_SCHEME_REGEX.is_match(input) { + url::Url::parse(input)? } else { - url::Url::parse(&format!("{}://{input}", D::SCHEME)) + url::Url::parse(&format!("{}://{input}", D::SCHEME))? }; - match parsed { - Ok(url) => Ok(Self { - url, - ..Self::default() - }), - Err(e) => Err(e.into()), - } + Ok(Self { + url, + ..Self::default() + }) } - pub(crate) fn port_or_dflt(&self) -> u16 { + /// fetches the port, if provided, or the default if not + #[must_use] + pub fn port_or_dflt(&self) -> u16 { self.url.port().unwrap_or(D::PORT) } - pub(crate) fn host_or_local(&self) -> &str { + + /// fetches the host, if provided, or the default if not + #[must_use] + pub fn host_or_local(&self) -> &str { if let Some(host) = self.url.host_str() { // the url lib is shit in that it prints ipv6 addresses with the brackets (e.g. [::1]) // but e.g. the socket handling libs want those addresses without, so we strip them here @@ -165,7 +173,9 @@ impl Url { } } - pub(crate) fn url(&self) -> &url::Url { + /// fetches the underlying raw URL + #[must_use] + pub fn url(&self) -> &url::Url { &self.url } } @@ -184,7 +194,10 @@ mod test { #[test_case("127.0.0.1", "http://127.0.0.1/"; "ensure scheme without port")] #[test_case("localhost:42", "http://localhost:42/"; "ensure scheme")] #[test_case("scheme://host:42/path?query=1&query=2#fragment", "scheme://host:42/path?query=1&query=2#fragment"; "all the url features")] - fn serialize_deserialize(input: &str, expected: &str) -> Result<()> { + fn serialize_deserialize( + input: &str, + expected: &str, + ) -> Result<(), Box> { let mut input = format!("\"{input}\""); // prepare for json compat let url: Url = unsafe { simd_json::from_str(&mut input)? }; From d41a1e37de62d14740010f7c758bc05567bb5c71 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Thu, 12 Oct 2023 15:24:59 +0200 Subject: [PATCH 3/9] Cleanup Signed-off-by: Heinz N. Gies --- Cargo.lock | 384 +++--------------- src/connectors/google.rs | 4 +- src/preprocessor.rs | 4 - .../kafka_schema_registry_prefix.rs | 92 ----- tremor-codec/Cargo.toml | 13 +- 5 files changed, 70 insertions(+), 427 deletions(-) delete mode 100644 src/preprocessor/kafka_schema_registry_prefix.rs diff --git a/Cargo.lock b/Cargo.lock index de3a25e091..b0da1913e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,17 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "addr" -version = "0.11.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936697e9caf938eb2905036100edf8e1269da8291f8a02f5fe7b37073784eec0" -dependencies = [ - "no-std-net", - "psl", - "psl-types", -] - [[package]] name = "addr2line" version = "0.21.0" @@ -113,9 +102,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" dependencies = [ "memchr", ] @@ -224,7 +213,7 @@ dependencies = [ "strum_macros", "thiserror", "typed-builder", - "uuid 1.4.1", + "uuid", "xz2", "zstd 0.12.4", ] @@ -316,14 +305,15 @@ dependencies = [ [[package]] name = "async-h1" -version = "2.3.3" +version = "2.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8101020758a4fc3a7c326cb42aa99e9fa77cbfb76987c128ad956406fe1f70a7" +checksum = "5d1d1dae8cb2c4258a79d6ed088b7fb9b4763bf4e9b22d040779761e046a2971" dependencies = [ "async-channel", "async-dup", - "async-std", - "futures-core", + "async-global-executor", + "async-io", + "futures-lite", "http-types", "httparse", "log", @@ -372,7 +362,7 @@ dependencies = [ "cfg-if", "event-listener 3.0.0", "futures-lite", - "rustix 0.38.17", + "rustix 0.38.18", "windows-sys 0.48.0", ] @@ -389,9 +379,9 @@ dependencies = [ [[package]] name = "async-signal" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1079d27511f6c038736279421774ef4ad4bdd2e300825f4a48c4cc463a57cedf" +checksum = "d2a5415b7abcdc9cd7d63d6badba5288b2ca017e3fbd4173b8f405449f1a2399" dependencies = [ "async-io", "async-lock", @@ -399,7 +389,7 @@ dependencies = [ "cfg-if", "futures-core", "futures-io", - "rustix 0.38.17", + "rustix 0.38.18", "signal-hook-registry", "slab", "windows-sys 0.48.0", @@ -649,7 +639,7 @@ dependencies = [ "http", "percent-encoding", "tracing", - "uuid 1.4.1", + "uuid", ] [[package]] @@ -1108,9 +1098,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.6.2" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c2f7349907b712260e64b0afe2f84692af14a454be26187d9df565c7f69266a" +checksum = "c79ad7fb2dd38f3dabd76b09c6a5a20c038fc0213ef1e9afd30eb777f120f019" dependencies = [ "memchr", "serde", @@ -1232,7 +1222,7 @@ checksum = "f1369bc6b9e9a7dfdae2055f6ec151fe9c554a9d23d357c0237cee2e25eaabb7" dependencies = [ "chrono", "chrono-tz-build", - "phf 0.11.2", + "phf", ] [[package]] @@ -1242,8 +1232,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2f5ebdc942f57ed96d560a6d1a459bae5851102a25d5bf89dc04ae453e31ecf" dependencies = [ "parse-zoneinfo", - "phf 0.11.2", - "phf_codegen 0.11.2", + "phf", + "phf_codegen", ] [[package]] @@ -1378,7 +1368,7 @@ dependencies = [ "thiserror", "tokio", "url", - "uuid 1.4.1", + "uuid", ] [[package]] @@ -1938,12 +1928,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d9d8664cf849d7d0f3114a3a387d2f5e4303176d746d5a951aaddc66dfe9240" -[[package]] -name = "doc-comment" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" - [[package]] name = "downcast-rs" version = "1.2.0" @@ -2881,12 +2865,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "integer-encoding" -version = "3.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" - [[package]] name = "io-lifetimes" version = "1.0.11" @@ -2923,7 +2901,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ "hermit-abi 0.3.3", - "rustix 0.38.17", + "rustix 0.38.18", "windows-sys 0.48.0", ] @@ -2959,9 +2937,9 @@ checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" [[package]] name = "jobserver" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" +checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" dependencies = [ "libc", ] @@ -2975,25 +2953,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "json-pointer" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fe841b94e719a482213cee19dd04927cf412f26d8dc84c5a446c081e49c2997" -dependencies = [ - "serde_json", -] - -[[package]] -name = "jsonway" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "effcb749443c905fbaef49d214f8b1049c240e0adb7af9baa0e201e625e4f9de" -dependencies = [ - "serde", - "serde_json", -] - [[package]] name = "jsonwebtoken" version = "7.2.0" @@ -3269,29 +3228,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "logos" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf8b031682c67a8e3d5446840f9573eb7fe26efe7ec8d195c9ac4c0647c502f1" -dependencies = [ - "logos-derive", -] - -[[package]] -name = "logos-derive" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d849148dbaf9661a6151d1ca82b13bb4c4c128146a88d05253b38d4e2f496c" -dependencies = [ - "beef", - "fnv", - "proc-macro2", - "quote", - "regex-syntax 0.6.29", - "syn 1.0.109", -] - [[package]] name = "lru" version = "0.11.1" @@ -3473,15 +3409,6 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" -[[package]] -name = "no-std-net" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bcece43b12349917e096cddfa66107277f123e6c96a5aea78711dc601a47152" -dependencies = [ - "serde", -] - [[package]] name = "nom" version = "5.1.3" @@ -3686,18 +3613,18 @@ dependencies = [ [[package]] name = "ordered-float" -version = "2.10.0" +version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" dependencies = [ "num-traits", ] [[package]] name = "ordered-float" -version = "3.9.1" +version = "3.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a54938017eacd63036332b4ae5c8a49fc8c0c1d6d629893057e4f13609edd06" +checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc" dependencies = [ "num-traits", ] @@ -3817,51 +3744,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" -[[package]] -name = "pest" -version = "2.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c022f1e7b65d6a24c0dbbd5fb344c66881bc01f3e5ae74a1c8100f2f985d98a4" -dependencies = [ - "memchr", - "thiserror", - "ucd-trie", -] - -[[package]] -name = "pest_derive" -version = "2.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35513f630d46400a977c4cb58f78e1bfbe01434316e60c37d27b9ad6139c66d8" -dependencies = [ - "pest", - "pest_generator", -] - -[[package]] -name = "pest_generator" -version = "2.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc9fc1b9e7057baba189b5c626e2d6f40681ae5b6eb064dc7c7834101ec8123a" -dependencies = [ - "pest", - "pest_meta", - "proc-macro2", - "quote", - "syn 2.0.38", -] - -[[package]] -name = "pest_meta" -version = "2.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1df74e9e7ec4053ceb980e7c0c8bd3594e977fde1af91daba9c928e8e8c6708d" -dependencies = [ - "once_cell", - "pest", - "sha2 0.10.8", -] - [[package]] name = "petgraph" version = "0.6.4" @@ -3872,15 +3754,6 @@ dependencies = [ "indexmap 2.0.2", ] -[[package]] -name = "phf" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" -dependencies = [ - "phf_shared 0.8.0", -] - [[package]] name = "phf" version = "0.11.2" @@ -3890,36 +3763,16 @@ dependencies = [ "phf_shared 0.11.2", ] -[[package]] -name = "phf_codegen" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" -dependencies = [ - "phf_generator 0.8.0", - "phf_shared 0.8.0", -] - [[package]] name = "phf_codegen" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" dependencies = [ - "phf_generator 0.11.2", + "phf_generator", "phf_shared 0.11.2", ] -[[package]] -name = "phf_generator" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" -dependencies = [ - "phf_shared 0.8.0", - "rand 0.7.3", -] - [[package]] name = "phf_generator" version = "0.11.2" @@ -3930,15 +3783,6 @@ dependencies = [ "rand 0.8.5", ] -[[package]] -name = "phf_shared" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" -dependencies = [ - "siphasher", -] - [[package]] name = "phf_shared" version = "0.10.0" @@ -4229,33 +4073,6 @@ version = "2.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" -[[package]] -name = "protofish" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2a5fc771504e21bfc00513bfdb1f8d2c183bdb58a50c8ec31db946daa5a3257" -dependencies = [ - "bytes", - "pest", - "pest_derive", - "snafu", -] - -[[package]] -name = "psl" -version = "2.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a1be0afcd844b15cfce18bf8cccf2dfa887a00a6454a9ea135f122b948cee91" -dependencies = [ - "psl-types", -] - -[[package]] -name = "psl-types" -version = "2.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33cb294fe86a74cbcf50d4445b37da762029549ebeea341421c7c70370f86cac" - [[package]] name = "quad-rand" version = "0.2.1" @@ -4297,7 +4114,6 @@ dependencies = [ "rand_chacha 0.2.2", "rand_core 0.5.1", "rand_hc", - "rand_pcg", ] [[package]] @@ -4358,15 +4174,6 @@ dependencies = [ "rand_core 0.5.1", ] -[[package]] -name = "rand_pcg" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" -dependencies = [ - "rand_core 0.5.1", -] - [[package]] name = "rand_xorshift" version = "0.3.0" @@ -4464,25 +4271,25 @@ dependencies = [ [[package]] name = "regex" -version = "1.9.6" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebee201405406dbf528b8b672104ae6d6d63e6d118cb10e4d51abbc7b58044ff" +checksum = "d119d7c7ca818f8a53c300863d4f87566aac09943aef5b355bb83969dae75d87" dependencies = [ "aho-corasick", "memchr", "regex-automata", - "regex-syntax 0.7.5", + "regex-syntax 0.8.1", ] [[package]] name = "regex-automata" -version = "0.3.9" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" +checksum = "465c6fc0621e4abc4187a2bda0937bfd4f722c2730b29562e19689ea796c9a4b" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.7.5", + "regex-syntax 0.8.1", ] [[package]] @@ -4493,15 +4300,15 @@ checksum = "9a6ebcd15653947e6140f59a9811a06ed061d18a5c35dfca2e2e4c5525696878" [[package]] name = "regex-syntax" -version = "0.6.29" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" [[package]] name = "regex-syntax" -version = "0.7.5" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" +checksum = "56d84fdd47036b038fc80dd333d10b6aab10d5d31f4a366e20014def75328d33" [[package]] name = "reqwest" @@ -4578,9 +4385,9 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.2" +version = "0.17.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "911b295d2d302948838c8ac142da1ee09fa7863163b44e6715bc9357905878b8" +checksum = "9babe80d5c16becf6594aa32ad2be8fe08498e7ae60b77de8df700e67f191d7e" dependencies = [ "cc", "getrandom 0.2.10", @@ -4634,14 +4441,14 @@ dependencies = [ "dirs", "half 2.3.1", "lazy_static", - "ordered-float 3.9.1", + "ordered-float 3.9.2", "regex", "rust_tokenizers", "serde", "serde_json", "tch", "thiserror", - "uuid 1.4.1", + "uuid", ] [[package]] @@ -4685,7 +4492,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" dependencies = [ - "semver 1.0.19", + "semver 1.0.20", ] [[package]] @@ -4704,9 +4511,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.17" +version = "0.38.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f25469e9ae0f3d0047ca8b93fc56843f38e6774f0914a107ff8b41be8be8e0b7" +checksum = "5a74ee2d7c2581cd139b42447d7d9389b889bdaad3a73f1ebb16f2a3237bb19c" dependencies = [ "bitflags 2.4.0", "errno", @@ -4851,21 +4658,16 @@ dependencies = [ [[package]] name = "schema_registry_converter" version = "3.1.0" +source = "git+https://github.com/tremor-rs/schema_registry_converter.git?branch=housekeeping#9e88b5845cd1f0142786815d23844f3c4fa7820a" dependencies = [ "apache-avro", "byteorder", - "bytes", "dashmap", "futures", - "integer-encoding", - "logos", - "protofish", "reqwest", "serde", "serde_json", "tokio", - "url", - "valico", ] [[package]] @@ -4928,9 +4730,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad977052201c6de01a8ef2aa3378c4bd23217a056337d1d6da40468d267a4fb0" +checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" [[package]] name = "semver-parser" @@ -4953,7 +4755,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" dependencies = [ - "ordered-float 2.10.0", + "ordered-float 2.10.1", "serde", ] @@ -5242,9 +5044,9 @@ dependencies = [ [[package]] name = "simd-json-derive" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fe635112b99ba4980d643d40c125eadbbf4f008187d01a2f6043d5b6e352bc9" +checksum = "aaa9752381f22a384f50938d39bb22d34053a6672f0fbadb38716e7ba5821f33" dependencies = [ "chrono", "itoa 1.0.9", @@ -5256,9 +5058,9 @@ dependencies = [ [[package]] name = "simd-json-derive-int" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bd11408bd50363eb2ada6dca003f1a27925edcf5715d66106fff9eaf23efa" +checksum = "3a53e29f4fa3048cfe9452895aa77d52b7d9ecd930011e780368cc5985866006" dependencies = [ "proc-macro2", "quote", @@ -5335,27 +5137,6 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" -[[package]] -name = "snafu" -version = "0.6.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eab12d3c261b2308b0d80c26fffb58d17eba81a4be97890101f416b478c79ca7" -dependencies = [ - "doc-comment", - "snafu-derive", -] - -[[package]] -name = "snafu-derive" -version = "0.6.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1508efa03c362e23817f96cde18abed596a25219a8b2c66e8db33c03543d315b" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "snap" version = "1.1.0" @@ -5717,7 +5498,7 @@ dependencies = [ "cfg-if", "fastrand 2.0.1", "redox_syscall 0.3.5", - "rustix 0.38.17", + "rustix 0.38.18", "windows-sys 0.48.0", ] @@ -6335,7 +6116,7 @@ dependencies = [ "tremor-common", "tremor-influx", "tremor-value", - "uuid 1.4.1", + "uuid", "value-trait", ] @@ -6525,7 +6306,7 @@ dependencies = [ "tremor-value", "trust-dns-resolver", "url", - "uuid 1.4.1", + "uuid", "value-trait", "xz2", "zstd 0.12.4", @@ -6735,12 +6516,6 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" -[[package]] -name = "ucd-trie" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" - [[package]] name = "unarray" version = "0.1.4" @@ -6841,15 +6616,6 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" -[[package]] -name = "uritemplate-next" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcde98d1fc3f528255b1ecb22fb688ee0d23deb672a8c57127df10b98b4bd18c" -dependencies = [ - "regex", -] - [[package]] name = "url" version = "2.4.1" @@ -6880,15 +6646,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" -[[package]] -name = "uuid" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" -dependencies = [ - "getrandom 0.2.10", -] - [[package]] name = "uuid" version = "1.4.1" @@ -6899,28 +6656,6 @@ dependencies = [ "serde", ] -[[package]] -name = "valico" -version = "3.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "647856408e327686b6640397d19f60fac3e64c3bfaa6afc409da63ef7da45edb" -dependencies = [ - "addr", - "base64 0.13.1", - "chrono", - "json-pointer", - "jsonway", - "percent-encoding", - "phf 0.8.0", - "phf_codegen 0.8.0", - "regex", - "serde", - "serde_json", - "uritemplate-next", - "url", - "uuid 0.8.2", -] - [[package]] name = "value-bag" version = "1.4.1" @@ -7164,7 +6899,7 @@ version = "0.22.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed63aea5ce73d0ff405984102c42de94fc55a6b75765d621c65262469b3c9b53" dependencies = [ - "ring 0.17.2", + "ring 0.17.3", "untrusted 0.9.0", ] @@ -7210,7 +6945,7 @@ dependencies = [ "either", "home", "once_cell", - "rustix 0.38.17", + "rustix 0.38.18", ] [[package]] @@ -7529,11 +7264,10 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.8+zstd.1.5.5" +version = "2.0.9+zstd.1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" +checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" dependencies = [ "cc", - "libc", "pkg-config", ] diff --git a/src/connectors/google.rs b/src/connectors/google.rs index bd5bd11ea2..917cb4095f 100644 --- a/src/connectors/google.rs +++ b/src/connectors/google.rs @@ -34,8 +34,8 @@ pub(crate) trait ChannelFactory< } /// Token Source -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(rename_all(serialize = "lowercase", deserialize = "lowercase"))] +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "lowercase")] pub enum TokenSrc { /// Enmbedded JSON Json(OwnedValue), diff --git a/src/preprocessor.rs b/src/preprocessor.rs index 1dfbdbebfd..ce9fb230a9 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -16,7 +16,6 @@ mod base64; mod decompress; pub(crate) mod gelf_chunking; mod ingest_ns; -mod kafka_schema_registry_prefix; mod length_prefixed; mod remove_empty; pub(crate) mod separate; @@ -83,9 +82,6 @@ pub fn lookup_with_config(config: &PreprocessorConfig) -> Result Ok(Box::::default()), "ingest-ns" => Ok(Box::::default()), "length-prefixed" => Ok(Box::::default()), - "schema-registry" => { - Ok(Box::::default()) - } "textual-length-prefixed" => { Ok(Box::::default()) } diff --git a/src/preprocessor/kafka_schema_registry_prefix.rs b/src/preprocessor/kafka_schema_registry_prefix.rs deleted file mode 100644 index 5ef766a921..0000000000 --- a/src/preprocessor/kafka_schema_registry_prefix.rs +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright 2020-2021, The Tremor Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! Extracts the ingest timestamp from the first 8 bytes of the message and removes it from the message. - -use super::prelude::*; -use byteorder::{BigEndian, ReadBytesExt}; -use value_trait::Mutable; - -#[derive(Clone, Default, Debug)] -pub(crate) struct SchemaRegistryPrefix {} -impl Preprocessor for SchemaRegistryPrefix { - fn name(&self) -> &str { - "schema-registry" - } - - fn process( - &mut self, - _ingest_ns: &mut u64, - data: &[u8], - mut meta: Value<'static>, - ) -> Result, Value<'static>)>> { - use std::io::Cursor; - if let Some(d) = data.get(5..) { - let mut c = Cursor::new(data); - let magic = c.read_u8()?; - if magic != 0 { - return Err( - format!("Invalid magic bytes (0x00) for kafka wire format: {magic}").into(), - ); - } - let schema = c.read_u32::()?; - meta.insert("schema_id", schema)?; - Ok(vec![(d.to_vec(), meta)]) - } else { - Err("Kafka schema registry Preprocessor: < 5 byte".into()) - } - } -} - -#[cfg(test)] -mod test { - use super::*; - use value_trait::ValueAccess; - - /// Tests if the preprocessor errors on data that's less then 5 bytes - #[test] - fn test_preprocessor_less_then_5_bytes() { - let mut pp = SchemaRegistryPrefix::default(); - let mut ingest_ns = 0; - let data = vec![0, 0, 0, 0]; - let meta = Value::object(); - let res = pp.process(&mut ingest_ns, &data, meta); - assert!(res.is_err()); - } - - /// Tests if `schema_id` is added to the meta data properly - #[test] - fn test_preprocessor_schema_id() -> Result<()> { - let mut pp = SchemaRegistryPrefix::default(); - let mut ingest_ns = 0; - let data = vec![0, 0, 0, 0, 1, 42]; - let meta = Value::object(); - let mut res = pp.process(&mut ingest_ns, &data, meta)?; - let (rest, meta) = res.pop().expect("no result"); - assert_eq!(meta.get_u8("schema_id"), Some(1)); - assert_eq!(rest, vec![42]); - Ok(()) - } - - /// Tests if the preprocessor errors on invalid magic bytes - #[test] - fn test_preprocessor_invalid_magic_bytes() { - let mut pp = SchemaRegistryPrefix::default(); - let mut ingest_ns = 0; - let data = vec![1, 0, 0, 0, 1]; - let meta = Value::object(); - let res = pp.process(&mut ingest_ns, &data, meta); - assert!(res.is_err()); - } -} diff --git a/tremor-codec/Cargo.toml b/tremor-codec/Cargo.toml index b45232353e..cd90a44631 100644 --- a/tremor-codec/Cargo.toml +++ b/tremor-codec/Cargo.toml @@ -32,11 +32,16 @@ uuid = { version = "1.3", features = ["v4"] } matches = "0.1" async-recursion = "1" -# codecs -schema_registry_converter = { version = "3", features = [ +schema_registry_converter = { version = "3", default-features = false, features = [ + "futures", + "rustls_tls", "avro", "easy", -], path = "../../schema_registry_converter" } +], git = "https://github.com/tremor-rs/schema_registry_converter.git", branch = "housekeeping" } + +# path = "schema_registry_converter" + +# codecs reqwest = { version = "0.11", default-features = false, features = [ "rustls-tls", "rustls-tls-native-roots", @@ -51,7 +56,7 @@ apache-avro = { version = "0.16", features = [ "zstandard", "bzip2", ] } -# git = "https://github.com/gklijs/schema_registry_converter.git", rev = "72d89d5" # we need this for v0.16 of avro versioning +# serde = "1" rmp-serde = "1.1" syslog_loose = "0.19" From 993ec297745aaa82388abb79e1e57f910f55c26f Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Thu, 12 Oct 2023 17:53:06 +0200 Subject: [PATCH 4/9] Extract pre/postprocessors Signed-off-by: Heinz N. Gies --- Cargo.lock | 65 +- Cargo.toml | 10 +- depricated/common/mmap.rs | 4 +- depricated/sink/amqp.rs | 2 +- depricated/sink/nats.rs | 2 +- depricated/sink/newrelic.rs | 2 +- depricated/sink/postgres.rs | 2 +- depricated/source/amqp.rs | 2 +- depricated/source/nats.rs | 2 +- depricated/source/postgres.rs | 2 +- depricated/source/sse.rs | 2 +- src/config.rs | 35 +- src/connectors.rs | 73 +- src/connectors/impls/bench.rs | 11 +- src/connectors/impls/cb.rs | 10 +- src/connectors/impls/clickhouse.rs | 4 +- src/connectors/impls/crononome.rs | 4 +- src/connectors/impls/discord.rs | 4 +- src/connectors/impls/dns/client.rs | 2 +- src/connectors/impls/elastic.rs | 11 +- src/connectors/impls/exit.rs | 6 +- src/connectors/impls/file.rs | 4 +- src/connectors/impls/gbq/writer.rs | 15 +- src/connectors/impls/gbq/writer/sink.rs | 36 +- src/connectors/impls/gcl/writer.rs | 7 +- src/connectors/impls/gcl/writer/meta.rs | 12 +- src/connectors/impls/gcl/writer/sink.rs | 8 +- src/connectors/impls/gcs/streamer.rs | 38 +- src/connectors/impls/gpubsub/consumer.rs | 5 +- src/connectors/impls/gpubsub/producer.rs | 16 +- src/connectors/impls/http/auth.rs | 3 +- src/connectors/impls/http/client.rs | 7 +- src/connectors/impls/http/meta.rs | 7 +- src/connectors/impls/http/server.rs | 6 +- src/connectors/impls/kafka.rs | 2 +- src/connectors/impls/kafka/consumer.rs | 4 +- src/connectors/impls/kafka/producer.rs | 4 +- src/connectors/impls/kv.rs | 4 +- src/connectors/impls/metrics.rs | 2 +- src/connectors/impls/metronome.rs | 6 +- src/connectors/impls/null.rs | 2 +- src/connectors/impls/otel/client.rs | 6 +- src/connectors/impls/otel/server.rs | 8 +- src/connectors/impls/s3/reader.rs | 4 +- src/connectors/impls/s3/streamer.rs | 6 +- src/connectors/impls/stdio.rs | 2 +- src/connectors/impls/tcp.rs | 8 +- src/connectors/impls/tcp/client.rs | 4 +- src/connectors/impls/tcp/server.rs | 4 +- src/connectors/impls/udp/client.rs | 4 +- src/connectors/impls/udp/server.rs | 4 +- src/connectors/impls/unix_socket/client.rs | 4 +- src/connectors/impls/unix_socket/server.rs | 4 +- src/connectors/impls/wal.rs | 4 +- src/connectors/impls/ws/client.rs | 4 +- src/connectors/impls/ws/server.rs | 4 +- src/connectors/prelude.rs | 17 +- src/connectors/sink.rs | 61 +- src/connectors/source.rs | 59 +- src/connectors/tests.rs | 16 +- src/connectors/utils/metrics.rs | 27 +- src/connectors/utils/mime.rs | 2 +- src/connectors/utils/pb.rs | 3 +- src/connectors/utils/reconnect.rs | 13 +- src/errors.rs | 59 +- src/lib.rs | 4 - src/pipeline.rs | 59 +- src/preprocessor.rs | 26 +- src/system.rs | 2 +- src/system/flow.rs | 72 +- src/system/flow_supervisor.rs | 15 +- tremor-api/src/api/model.rs | 5 +- tremor-cli/Cargo.toml | 1 + tremor-cli/src/errors.rs | 1 + tremor-cli/src/run.rs | 24 +- tremor-codec/Cargo.toml | 5 +- tremor-codec/src/lib.rs | 9 +- tremor-common/src/alias.rs | 114 +++ tremor-common/src/base64.rs | 1 + tremor-common/src/lib.rs | 14 + tremor-config/Cargo.toml | 17 + .../src/config.rs => tremor-config/src/lib.rs | 48 +- tremor-interceptor/Cargo.toml | 36 + tremor-interceptor/src/errors.rs | 66 ++ tremor-interceptor/src/lib.rs | 35 + .../src}/postprocessor.rs | 13 +- .../src}/postprocessor/base64.rs | 5 +- .../src}/postprocessor/chunk.rs | 4 +- .../src}/postprocessor/compress.rs | 0 .../src}/postprocessor/gelf_chunking.rs | 0 .../src}/postprocessor/ingest_ns.rs | 2 +- .../src}/postprocessor/length_prefixed.rs | 2 +- .../src}/postprocessor/separate.rs | 7 +- .../postprocessor/textual_length_prefixed.rs | 2 +- tremor-interceptor/src/preprocessor.rs | 766 ++++++++++++++++++ .../src}/preprocessor/base64.rs | 3 +- .../src}/preprocessor/decompress.rs | 0 .../src}/preprocessor/gelf_chunking.rs | 7 +- .../src}/preprocessor/ingest_ns.rs | 0 .../src}/preprocessor/length_prefixed.rs | 0 .../src}/preprocessor/remove_empty.rs | 2 +- .../src}/preprocessor/separate.rs | 14 +- .../preprocessor/textual_length_prefixed.rs | 2 +- tremor-pipeline/Cargo.toml | 2 +- tremor-pipeline/src/executable_graph.rs | 3 +- tremor-pipeline/src/lib.rs | 5 +- tremor-pipeline/src/op.rs | 15 - .../src/op/bert/sequence_classification.rs | 2 +- tremor-pipeline/src/op/bert/summarization.rs | 2 +- tremor-pipeline/src/op/debug/history.rs | 3 +- tremor-pipeline/src/op/generic/batch.rs | 2 +- tremor-pipeline/src/op/prelude.rs | 1 + tremor-pipeline/src/op/qos/backpressure.rs | 2 +- tremor-pipeline/src/op/qos/percentile.rs | 2 +- tremor-pipeline/src/op/qos/roundrobin.rs | 2 +- tremor-script/Cargo.toml | 1 - tremor-script/src/ast/deploy.rs | 8 +- tremor-script/src/extractor.rs | 3 +- tremor-script/src/extractor/base64.rs | 3 +- tremor-script/src/std_lib/base64.rs | 3 +- tremor-value/src/macros.rs | 2 +- tremor-value/src/value/cmp.rs | 3 +- 122 files changed, 1611 insertions(+), 606 deletions(-) create mode 100644 tremor-common/src/alias.rs create mode 100644 tremor-config/Cargo.toml rename tremor-codec/src/config.rs => tremor-config/src/lib.rs (80%) create mode 100644 tremor-interceptor/Cargo.toml create mode 100644 tremor-interceptor/src/errors.rs create mode 100644 tremor-interceptor/src/lib.rs rename {src => tremor-interceptor/src}/postprocessor.rs (96%) rename {src => tremor-interceptor/src}/postprocessor/base64.rs (92%) rename {src => tremor-interceptor/src}/postprocessor/chunk.rs (99%) rename {src => tremor-interceptor/src}/postprocessor/compress.rs (100%) rename {src => tremor-interceptor/src}/postprocessor/gelf_chunking.rs (100%) rename {src => tremor-interceptor/src}/postprocessor/ingest_ns.rs (97%) rename {src => tremor-interceptor/src}/postprocessor/length_prefixed.rs (97%) rename {src => tremor-interceptor/src}/postprocessor/separate.rs (96%) rename {src => tremor-interceptor/src}/postprocessor/textual_length_prefixed.rs (97%) create mode 100644 tremor-interceptor/src/preprocessor.rs rename {src => tremor-interceptor/src}/preprocessor/base64.rs (94%) rename {src => tremor-interceptor/src}/preprocessor/decompress.rs (100%) rename {src => tremor-interceptor/src}/preprocessor/gelf_chunking.rs (98%) rename {src => tremor-interceptor/src}/preprocessor/ingest_ns.rs (100%) rename {src => tremor-interceptor/src}/preprocessor/length_prefixed.rs (100%) rename {src => tremor-interceptor/src}/preprocessor/remove_empty.rs (97%) rename {src => tremor-interceptor/src}/preprocessor/separate.rs (99%) rename {src => tremor-interceptor/src}/preprocessor/textual_length_prefixed.rs (98%) diff --git a/Cargo.lock b/Cargo.lock index b0da1913e8..49f8efedac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6077,6 +6077,7 @@ dependencies = [ "tremor-api", "tremor-codec", "tremor-common", + "tremor-interceptor", "tremor-pipeline", "tremor-runtime", "tremor-script", @@ -6114,6 +6115,7 @@ dependencies = [ "test-case", "tokio", "tremor-common", + "tremor-config", "tremor-influx", "tremor-value", "uuid", @@ -6137,6 +6139,15 @@ dependencies = [ "url", ] +[[package]] +name = "tremor-config" +version = "0.13.0-rc.16" +dependencies = [ + "serde", + "simd-json", + "tremor-value", +] + [[package]] name = "tremor-influx" version = "0.13.0-rc.16" @@ -6149,6 +6160,30 @@ dependencies = [ "value-trait", ] +[[package]] +name = "tremor-interceptor" +version = "0.13.0-rc.16" +dependencies = [ + "byteorder", + "bytes", + "error-chain", + "libflate", + "log", + "lz4", + "memchr", + "proptest", + "rand 0.8.5", + "serde", + "simd-json", + "snap", + "tremor-common", + "tremor-config", + "tremor-value", + "value-trait", + "xz2", + "zstd 0.13.0", +] + [[package]] name = "tremor-kv" version = "0.6.1" @@ -6175,7 +6210,6 @@ dependencies = [ name = "tremor-pipeline" version = "0.13.0-rc.16" dependencies = [ - "base64 0.21.4", "beef", "criterion", "either", @@ -6197,6 +6231,7 @@ dependencies = [ "tempfile", "tokio", "tremor-common", + "tremor-config", "tremor-script", "tremor-value", "url", @@ -6222,8 +6257,6 @@ dependencies = [ "base64 0.21.4", "beef", "bimap", - "byteorder", - "bytes", "chrono", "chrono-tz", "clickhouse-rs", @@ -6254,11 +6287,8 @@ dependencies = [ "itoa 1.0.9", "lazy_static", "lexical", - "libflate", "log", - "lz4", "matches", - "memchr", "mime", "num_cpus", "pin-project-lite 0.2.13", @@ -6287,7 +6317,6 @@ dependencies = [ "simd-json-derive", "simdutf8", "sled", - "snap", "socket2 0.5.4", "tempfile", "test-case", @@ -6299,7 +6328,9 @@ dependencies = [ "tonic", "tremor-codec", "tremor-common", + "tremor-config", "tremor-influx", + "tremor-interceptor", "tremor-otelapis", "tremor-pipeline", "tremor-script", @@ -6309,7 +6340,6 @@ dependencies = [ "uuid", "value-trait", "xz2", - "zstd 0.12.4", ] [[package]] @@ -6317,7 +6347,6 @@ name = "tremor-script" version = "0.13.0-rc.16" dependencies = [ "atty", - "base64 0.21.4", "beef", "byteorder", "chrono", @@ -7242,6 +7271,15 @@ dependencies = [ "zstd-safe 6.0.6", ] +[[package]] +name = "zstd" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" +dependencies = [ + "zstd-safe 7.0.0", +] + [[package]] name = "zstd-safe" version = "5.0.2+zstd.1.5.2" @@ -7262,6 +7300,15 @@ dependencies = [ "zstd-sys", ] +[[package]] +name = "zstd-safe" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" +dependencies = [ + "zstd-sys", +] + [[package]] name = "zstd-sys" version = "2.0.9+zstd.1.5.5" diff --git a/Cargo.toml b/Cargo.toml index 2c19b53b21..d29346089e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ members = [ "tremor-codec", "tremor-common", "tremor-influx", + "tremor-interceptor", "tremor-pipeline", "tremor-script", "tremor-value", @@ -46,8 +47,6 @@ async-trait = "0.1" base64 = "0.21" beef = { version = "0.5", features = ["impl_serde"] } bimap = { version = "0.6", features = ["serde"] } -byteorder = "1" -bytes = "1.5" chrono = "0.4" chrono-tz = "0.8" @@ -78,10 +77,7 @@ http-types = "2.12" http-body = "0.4" indexmap = "2" lazy_static = "1" -libflate = "2" log = { version = "0.4", features = ["kv_unstable"] } -lz4 = "1.24.0" -memchr = "2.6" pin-project-lite = "0.2" rand = "0.8.5" regex = "1.9" @@ -89,17 +85,17 @@ serde = { version = "1", features = ["derive"] } serde_yaml = "0.9" simd-json = { version = "0.11", features = ["known-key"] } simd-json-derive = "0.11" -snap = "1" socket2 = { version = "0.5", features = ["all"] } tremor-common = { path = "tremor-common" } +tremor-config = { path = "tremor-config" } tremor-codec = { path = "tremor-codec" } tremor-influx = { path = "tremor-influx" } tremor-pipeline = { path = "tremor-pipeline" } tremor-script = { path = "tremor-script" } tremor-value = { path = "tremor-value" } +tremor-interceptor = { path = "tremor-interceptor" } url = "2.4" value-trait = "0.6" -zstd = "0.12" # blaster / blackhole# codecs diff --git a/depricated/common/mmap.rs b/depricated/common/mmap.rs index c60757113c..96de33b15c 100644 --- a/depricated/common/mmap.rs +++ b/depricated/common/mmap.rs @@ -20,7 +20,7 @@ use std::io; use std::io::Write; use std::ops::DerefMut; use std::path::Path; -use tremor_pipeline::ConfigImpl; + use tremor_script::prelude::*; pub(crate) trait Kv { @@ -115,7 +115,7 @@ impl Kv for Anon { } } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Deserialize, Debug, Clone)] pub(crate) struct Config { diff --git a/depricated/sink/amqp.rs b/depricated/sink/amqp.rs index 28b8411e01..774443443c 100644 --- a/depricated/sink/amqp.rs +++ b/depricated/sink/amqp.rs @@ -51,7 +51,7 @@ impl Config { } } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} /// Amqp offramp connector pub(crate) struct Amqp { diff --git a/depricated/sink/nats.rs b/depricated/sink/nats.rs index 90f9951049..5cfd6b2e82 100644 --- a/depricated/sink/nats.rs +++ b/depricated/sink/nats.rs @@ -110,7 +110,7 @@ impl Config { } } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} pub(crate) struct Nats { sink_url: TremorUrl, diff --git a/depricated/sink/newrelic.rs b/depricated/sink/newrelic.rs index 7f34ca751b..72962a5b86 100644 --- a/depricated/sink/newrelic.rs +++ b/depricated/sink/newrelic.rs @@ -55,7 +55,7 @@ pub(crate) enum Key { InsertKey(String), } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Debug, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] diff --git a/depricated/sink/postgres.rs b/depricated/sink/postgres.rs index fe9bb167b1..4595417c02 100644 --- a/depricated/sink/postgres.rs +++ b/depricated/sink/postgres.rs @@ -42,7 +42,7 @@ pub(crate) struct Config { pub(crate) table: String, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} pub(crate) struct Builder {} impl offramp::Builder for Builder { diff --git a/depricated/source/amqp.rs b/depricated/source/amqp.rs index 8b51394eff..8ffd6ea1ed 100644 --- a/depricated/source/amqp.rs +++ b/depricated/source/amqp.rs @@ -41,7 +41,7 @@ pub(crate) struct Config { exchange: String, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} pub(crate) struct Amqp { pub(crate) config: Config, diff --git a/depricated/source/nats.rs b/depricated/source/nats.rs index ce0fa089df..841bf2e2e8 100644 --- a/depricated/source/nats.rs +++ b/depricated/source/nats.rs @@ -31,7 +31,7 @@ pub(crate) struct Config { pub(crate) options: ConnectOptions, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} impl Config { async fn connection(&self) -> Result { diff --git a/depricated/source/postgres.rs b/depricated/source/postgres.rs index 1a8d215075..e50edfa3b8 100644 --- a/depricated/source/postgres.rs +++ b/depricated/source/postgres.rs @@ -43,7 +43,7 @@ pub(crate) struct Config { pub(crate) cache: CacheConfig, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} pub(crate) struct Postgres { onramp_id: TremorUrl, diff --git a/depricated/source/sse.rs b/depricated/source/sse.rs index d0b8450358..20d7e61d56 100644 --- a/depricated/source/sse.rs +++ b/depricated/source/sse.rs @@ -27,7 +27,7 @@ pub(crate) struct Config { #[serde(default)] pub(crate) headers: HashMap, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} struct MiddlewareHeader { header: HashMap, diff --git a/src/config.rs b/src/config.rs index 4f5869a61c..2a470ccd16 100644 --- a/src/config.rs +++ b/src/config.rs @@ -14,8 +14,8 @@ use crate::connectors::prelude::*; use simd_json::ValueType; -#[allow(clippy::module_name_repetitions)] -pub use tremor_codec::config::{Codec, NameWithConfig, Postprocessor, Preprocessor}; +use tremor_common::alias; +use tremor_interceptor::{postprocessor, preprocessor}; use tremor_script::{ ast::deploy::ConnectorDefinition, ast::{self, Helper}, @@ -87,18 +87,18 @@ pub(crate) struct Connector { pub connector_type: ConnectorType, /// Codec in force for connector - pub codec: Option, + pub codec: Option, /// Configuration map - pub config: tremor_pipeline::ConfigMap, + pub config: tremor_config::Map, // TODO: interceptors or configurable processors /// Preprocessor chain configuration - pub preprocessors: Option>, + pub preprocessors: Option>, // TODO: interceptors or configurable processors /// Postprocessor chain configuration - pub postprocessors: Option>, + pub postprocessors: Option>, pub(crate) reconnect: Reconnect, @@ -109,7 +109,7 @@ pub(crate) struct Connector { impl Connector { /// Spawns a connector from a definition pub(crate) fn from_defn( - alias: &Alias, + alias: &alias::Connector, defn: &ast::ConnectorDefinition<'static>, ) -> crate::Result { let aggr_reg = tremor_script::registry::aggr(); @@ -125,11 +125,16 @@ impl Connector { /// Creates a connector from it's definition (aka config + settings) #[allow(clippy::too_many_lines)] pub(crate) fn from_config( - connector_alias: &Alias, + connector_alias: &alias::Connector, connector_type: ConnectorType, connector_config: &Value<'static>, ) -> crate::Result { - fn validate_type(v: &Value, k: &str, t: ValueType, connector_alias: &Alias) -> Result<()> { + fn validate_type( + v: &Value, + k: &str, + t: ValueType, + connector_alias: &alias::Connector, + ) -> Result<()> { if v.get(k).is_some() && v.get(k).map(Value::value_type) != Some(t) { return Err(ErrorKind::InvalidConnectorDefinition( connector_alias.to_string(), @@ -206,7 +211,7 @@ impl Connector { .get_array(ConnectorDefinition::PREPROCESSORS) .map(|o| { o.iter() - .map(Preprocessor::try_from) + .map(preprocessor::Config::try_from) .collect::>() }) .transpose()?, @@ -214,7 +219,7 @@ impl Connector { .get_array(ConnectorDefinition::POSTPROCESSORS) .map(|o| { o.iter() - .map(Preprocessor::try_from) + .map(preprocessor::Config::try_from) .collect::>() }) .transpose()?, @@ -227,7 +232,7 @@ impl Connector { metrics_interval_s: connector_config.get_u64(ConnectorDefinition::METRICS_INTERVAL_S), codec: connector_config .get(ConnectorDefinition::CODEC) - .map(Codec::try_from) + .map(tremor_codec::Config::try_from) .transpose()?, }) } @@ -247,7 +252,7 @@ pub struct Binding { #[cfg(test)] mod tests { use super::*; - use crate::{errors::Result, system::flow}; + use crate::errors::Result; #[test] fn test_reconnect_serde() -> Result<()> { @@ -276,7 +281,7 @@ mod tests { #[test] fn test_config_builtin_preproc_with_config() -> Result<()> { let c = Connector::from_config( - &Alias::new("flow", "my_otel_client"), + &alias::Connector::new("flow", "my_otel_client"), ConnectorType::from("otel_client".to_string()), &literal!({ "preprocessors": [ {"name": "snot", "config": { "separator": "\n" }}], @@ -307,7 +312,7 @@ mod tests { "reconnect": {}, "metrics_interval_s": "wrong_type" }); - let id = Alias::new(flow::Alias::new("flow"), "my_id"); + let id = alias::Connector::new(tremor_common::alias::Flow::new("flow"), "my_id"); let res = Connector::from_config(&id, "fancy_schmancy".into(), &config); assert!(res.is_err()); assert_eq!(String::from("Invalid Definition for connector \"flow::my_id\": Expected type I64 for key metrics_interval_s but got String"), res.err().map(|e| e.to_string()).unwrap_or_default()); diff --git a/src/connectors.rs b/src/connectors.rs index 4a409b23c2..e7f83595c5 100644 --- a/src/connectors.rs +++ b/src/connectors.rs @@ -39,7 +39,7 @@ use crate::{ errors::{connector_send_err, Error, Kind as ErrorKind, Result}, instance::State, log_error, pipeline, qsize, - system::{flow, KillSwitch, World}, + system::{KillSwitch, World}, }; use beef::Cow; use futures::Future; @@ -47,6 +47,7 @@ use halfbrown::HashMap; use std::{fmt::Display, time::Duration}; use tokio::task::{self, JoinHandle}; use tremor_common::{ + alias, ids::{ConnectorId, ConnectorIdGen, SourceId}, ports::{Port, ERR, IN, OUT}, }; @@ -54,10 +55,9 @@ use tremor_pipeline::METRICS_CHANNEL; use tremor_script::ast::DeployEndpoint; use tremor_value::Value; use utils::reconnect::{Attempt, ConnectionLostNotifier, ReconnectRuntime}; -use value_trait::{Builder, Mutable, ValueAccess}; - /// quiescence stuff pub(crate) use utils::{metrics, reconnect}; +use value_trait::{Builder, Mutable, ValueAccess}; /// Accept timeout pub(crate) const ACCEPT_TIMEOUT: Duration = Duration::from_millis(100); @@ -66,7 +66,7 @@ pub(crate) const ACCEPT_TIMEOUT: Duration = Duration::from_millis(100); #[derive(Clone, Debug)] pub struct Addr { /// connector instance alias - pub(crate) alias: Alias, + pub(crate) alias: alias::Connector, sender: Sender, source: Option, pub(crate) sink: Option, @@ -224,7 +224,7 @@ pub(crate) enum Msg { /// bears a `url` to identify the connector who finished the operation pub(crate) struct ConnectorResult { /// the connector alias - pub(crate) alias: Alias, + pub(crate) alias: alias::Connector, /// the actual result pub(crate) res: Result, } @@ -248,7 +248,7 @@ impl ConnectorResult<()> { /// context for a Connector or its parts pub(crate) trait Context: Display + Clone { /// provide the alias of the connector - fn alias(&self) -> &Alias; + fn alias(&self) -> &alias::Connector; /// get the quiescence beacon for checking if we should continue reading/writing fn quiescence_beacon(&self) -> &QuiescenceBeacon; @@ -315,7 +315,7 @@ pub(crate) trait Context: Display + Clone { #[derive(Clone)] pub(crate) struct ConnectorContext { /// alias of the connector instance - pub(crate) alias: Alias, + pub(crate) alias: alias::Connector, /// type of the connector connector_type: ConnectorType, /// The Quiescence Beacon @@ -331,7 +331,7 @@ impl Display for ConnectorContext { } impl Context for ConnectorContext { - fn alias(&self) -> &Alias { + fn alias(&self) -> &alias::Connector { &self.alias } @@ -352,7 +352,7 @@ impl Context for ConnectorContext { #[derive(Debug, Serialize)] pub struct StatusReport { /// connector alias - pub(crate) alias: Alias, + pub(crate) alias: alias::Connector, /// state of the connector pub(crate) status: State, /// current connectivity @@ -364,7 +364,7 @@ pub struct StatusReport { impl StatusReport { /// the connector alias #[must_use] - pub fn alias(&self) -> &Alias { + pub fn alias(&self) -> &alias::Connector { &self.alias } @@ -421,7 +421,7 @@ pub(crate) type Known = /// # Errors /// if the connector can not be built or the config is invalid pub(crate) async fn spawn( - alias: &Alias, + alias: &alias::Connector, connector_id_gen: &mut ConnectorIdGen, builder: &dyn ConnectorBuilder, config: ConnectorConfig, @@ -437,7 +437,7 @@ pub(crate) async fn spawn( #[allow(clippy::too_many_lines)] // instantiates the connector and starts listening for control plane messages async fn connector_task( - alias: Alias, + alias: alias::Connector, mut connector: Box, config: ConnectorConfig, uid: ConnectorId, @@ -894,7 +894,7 @@ enum DrainState { struct Drainage { tx: Sender>, - alias: Alias, + alias: alias::Connector, source_drained: DrainState, sink_drained: DrainState, } @@ -1124,41 +1124,6 @@ where } } -/// unique instance alias/id of a connector within a deployment -#[derive(Debug, PartialEq, PartialOrd, Eq, Hash, Clone, Serialize, Deserialize)] -pub struct Alias { - flow_alias: flow::Alias, - connector_alias: String, -} - -impl Alias { - /// construct a new `ConnectorId` from the id of the containing flow and the connector instance id - pub fn new(flow_alias: impl Into, connector_alias: impl Into) -> Self { - Self { - flow_alias: flow_alias.into(), - connector_alias: connector_alias.into(), - } - } - - /// get a reference to the flow alias - #[must_use] - pub fn flow_alias(&self) -> &flow::Alias { - &self.flow_alias - } - - /// get a reference to the connector alias - #[must_use] - pub fn connector_alias(&self) -> &str { - self.connector_alias.as_str() - } -} - -impl std::fmt::Display for Alias { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}::{}", self.flow_alias, self.connector_alias) - } -} - /// something that is able to create a connector instance #[async_trait::async_trait] pub(crate) trait ConnectorBuilder: Sync + Send + std::fmt::Debug { @@ -1172,7 +1137,7 @@ pub(crate) trait ConnectorBuilder: Sync + Send + std::fmt::Debug { /// * If the config is invalid for the connector async fn build( &self, - alias: &Alias, + alias: &alias::Connector, config: &ConnectorConfig, kill_switch: &KillSwitch, ) -> Result> { @@ -1189,7 +1154,7 @@ pub(crate) trait ConnectorBuilder: Sync + Send + std::fmt::Debug { /// * If the config is invalid for the connector async fn build_cfg( &self, - _alias: &Alias, + _alias: &alias::Connector, _config: &ConnectorConfig, _connector_config: &Value, _kill_switch: &KillSwitch, @@ -1294,14 +1259,12 @@ where #[cfg(test)] pub(crate) mod unit_tests { - use crate::system::flow; - use super::*; #[derive(Clone)] pub(crate) struct FakeContext { t: ConnectorType, - alias: Alias, + alias: alias::Connector, notifier: reconnect::ConnectionLostNotifier, beacon: QuiescenceBeacon, } @@ -1310,7 +1273,7 @@ pub(crate) mod unit_tests { pub(crate) fn new(tx: Sender) -> Self { Self { t: ConnectorType::from("snot"), - alias: Alias::new(flow::Alias::new("fake"), "fake"), + alias: alias::Connector::new(alias::Flow::new("fake"), "fake"), notifier: reconnect::ConnectionLostNotifier::new(tx), beacon: QuiescenceBeacon::default(), } @@ -1324,7 +1287,7 @@ pub(crate) mod unit_tests { } impl Context for FakeContext { - fn alias(&self) -> &Alias { + fn alias(&self) -> &alias::Connector { &self.alias } diff --git a/src/connectors/impls/bench.rs b/src/connectors/impls/bench.rs index caa37bacfe..4ef6a3fca7 100644 --- a/src/connectors/impls/bench.rs +++ b/src/connectors/impls/bench.rs @@ -202,7 +202,6 @@ use crate::{ connectors::prelude::*, system::{KillSwitch, ShutdownMode}, }; -use base64::Engine; use hdrhistogram::Histogram; use std::{ cmp::min, @@ -210,7 +209,11 @@ use std::{ io::{stdout, BufRead as StdBufRead, BufReader, Read, Write}, time::Duration, }; -use tremor_common::{base64::BASE64, file, time::nanotime}; +use tremor_common::{ + base64::{Engine, BASE64}, + file, + time::nanotime, +}; use xz2::read::XzDecoder; #[derive(Deserialize, Debug, Clone)] @@ -252,7 +255,7 @@ fn default_significant_figures() -> u8 { 2 } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Debug, Default)] pub(crate) struct Builder {} @@ -270,7 +273,7 @@ fn decode>(base64: bool, data: T) -> Result> { impl ConnectorBuilder for Builder { async fn build_cfg( &self, - id: &Alias, + id: &alias::Connector, _: &ConnectorConfig, config: &Value, kill_switch: &KillSwitch, diff --git a/src/connectors/impls/cb.rs b/src/connectors/impls/cb.rs index 38f1271c47..3f82d9b2be 100644 --- a/src/connectors/impls/cb.rs +++ b/src/connectors/impls/cb.rs @@ -139,7 +139,7 @@ fn default_timeout() -> u64 { 10_000_000_000 } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Debug, Default)] pub(crate) struct Builder {} @@ -152,7 +152,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - _: &Alias, + _: &alias::Connector, _: &ConnectorConfig, raw: &Value, kill_switch: &KillSwitch, @@ -332,7 +332,11 @@ impl CbSource { }; self.finished && all_received } - async fn new(config: &Config, _alias: &Alias, kill_switch: KillSwitch) -> Result { + async fn new( + config: &Config, + _alias: &alias::Connector, + kill_switch: KillSwitch, + ) -> Result { let mut files = vec![]; for path in &config.paths { let file = open(path).await?; diff --git a/src/connectors/impls/clickhouse.rs b/src/connectors/impls/clickhouse.rs index abda49bc6c..77ed8619ac 100644 --- a/src/connectors/impls/clickhouse.rs +++ b/src/connectors/impls/clickhouse.rs @@ -303,7 +303,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - _alias: &Alias, + _alias: &alias::Connector, _config: &ConnectorConfig, connector_config: &Value, _kill_switch: &KillSwitch, @@ -378,7 +378,7 @@ impl Defaults for ClickHouseDefaults { const PORT: u16 = 9000; } -impl ConfigImpl for ClickhouseConfig {} +impl tremor_config::Impl for ClickhouseConfig {} #[derive(Clone, Copy, Deserialize, Default)] #[serde(rename_all = "snake_case")] diff --git a/src/connectors/impls/crononome.rs b/src/connectors/impls/crononome.rs index 4de30441ea..b863ba7e59 100644 --- a/src/connectors/impls/crononome.rs +++ b/src/connectors/impls/crononome.rs @@ -127,7 +127,7 @@ pub(crate) struct Config { pub(crate) entries: Option, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Debug, Default)] pub(crate) struct Builder {} @@ -140,7 +140,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - id: &Alias, + id: &alias::Connector, _: &ConnectorConfig, raw: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/discord.rs b/src/connectors/impls/discord.rs index 19e2d14a29..3c49afbf16 100644 --- a/src/connectors/impls/discord.rs +++ b/src/connectors/impls/discord.rs @@ -58,7 +58,7 @@ pub(crate) struct Config { pub intents: Vec, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Debug, Default)] pub(crate) struct Builder {} @@ -70,7 +70,7 @@ impl ConnectorBuilder for Builder { } async fn build_cfg( &self, - _: &Alias, + _: &alias::Connector, _: &ConnectorConfig, config: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/dns/client.rs b/src/connectors/impls/dns/client.rs index 348d441d7d..c70ca95a46 100644 --- a/src/connectors/impls/dns/client.rs +++ b/src/connectors/impls/dns/client.rs @@ -33,7 +33,7 @@ impl ConnectorBuilder for Builder { async fn build( &self, - _id: &Alias, + _id: &alias::Connector, _raw_config: &ConnectorConfig, _kill_switch: &KillSwitch, ) -> Result> { diff --git a/src/connectors/impls/elastic.rs b/src/connectors/impls/elastic.rs index ce9f48892b..90f4db0d8d 100644 --- a/src/connectors/impls/elastic.rs +++ b/src/connectors/impls/elastic.rs @@ -293,7 +293,7 @@ pub(crate) struct Config { #[serde(default = "Default::default")] timeout: Option, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} const DEFAULT_CONCURRENCY: usize = 4; @@ -311,7 +311,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - id: &Alias, + id: &alias::Connector, _: &ConnectorConfig, raw_config: &Value, _kill_switch: &KillSwitch, @@ -1172,10 +1172,9 @@ impl CertValidation { } #[cfg(test)] mod tests { - use elasticsearch::http::request::Body; - use super::*; use crate::config::Connector as ConnectorConfig; + use elasticsearch::http::request::Body; #[tokio::test(flavor = "multi_thread")] async fn connector_builder_empty_nodes() -> Result<()> { @@ -1184,7 +1183,7 @@ mod tests { "nodes": [] } }); - let alias = Alias::new("flow", "my_elastic"); + let alias = alias::Connector::new("flow", "my_elastic"); let builder = super::Builder::default(); let connector_config = ConnectorConfig::from_config(&alias, builder.connector_type(), &config)?; @@ -1213,7 +1212,7 @@ mod tests { ] } }); - let alias = Alias::new("snot", "my_elastic"); + let alias = alias::Connector::new("snot", "my_elastic"); let builder = super::Builder::default(); let connector_config = ConnectorConfig::from_config(&alias, builder.connector_type(), &config)?; diff --git a/src/connectors/impls/exit.rs b/src/connectors/impls/exit.rs index adaf40efdd..e00891f115 100644 --- a/src/connectors/impls/exit.rs +++ b/src/connectors/impls/exit.rs @@ -106,13 +106,13 @@ pub(crate) struct Config { graceful: bool, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} impl Default for Config { fn default() -> Self { Self { delay: None, - graceful: default_true(), + graceful: true, } } } @@ -128,7 +128,7 @@ impl ConnectorBuilder for Builder { async fn build( &self, - _id: &Alias, + _id: &alias::Connector, config: &ConnectorConfig, kill_switch: &KillSwitch, ) -> Result> { diff --git a/src/connectors/impls/file.rs b/src/connectors/impls/file.rs index 2b116371a0..c8d6690296 100644 --- a/src/connectors/impls/file.rs +++ b/src/connectors/impls/file.rs @@ -196,7 +196,7 @@ pub(crate) struct Config { pub(crate) chunk_size: usize, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} /// file connector pub(crate) struct File { @@ -217,7 +217,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - _: &Alias, + _: &alias::Connector, _: &ConnectorConfig, config: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/gbq/writer.rs b/src/connectors/impls/gbq/writer.rs index d819171949..6f7b08b1f3 100644 --- a/src/connectors/impls/gbq/writer.rs +++ b/src/connectors/impls/gbq/writer.rs @@ -19,7 +19,6 @@ use crate::connectors::impls::gbq::writer::sink::{GbqSink, TonicChannelFactory}; use crate::connectors::prelude::*; use crate::connectors::{Connector, ConnectorBuilder, ConnectorConfig, ConnectorType}; use serde::Deserialize; -use tremor_pipeline::ConfigImpl; #[derive(Deserialize, Clone)] pub(crate) struct Config { @@ -31,7 +30,7 @@ pub(crate) struct Config { #[serde(default = "default_request_size_limit")] pub request_size_limit: usize, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} fn default_request_size_limit() -> usize { // 10MB @@ -73,7 +72,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - _: &Alias, + _: &alias::Connector, _: &ConnectorConfig, config: &Value, _kill_switch: &KillSwitch, @@ -109,7 +108,7 @@ mod tests { .create_sink( SinkContext::new( SinkId::default(), - Alias::new("a", "b"), + alias::Connector::new("a", "b"), ConnectorType::default(), QuiescenceBeacon::default(), ConnectionLostNotifier::new(crate::channel::bounded(128).0), @@ -117,8 +116,12 @@ mod tests { builder( &ConnectorConfig::default(), CodecReq::Structured, - &Alias::new("a", "b"), - SinkReporter::new(Alias::new("a", "b"), broadcast::channel(1).0, None), + &alias::Connector::new("a", "b"), + SinkReporter::new( + alias::Connector::new("a", "b"), + broadcast::channel(1).0, + None, + ), )?, ) .await?; diff --git a/src/connectors/impls/gbq/writer/sink.rs b/src/connectors/impls/gbq/writer/sink.rs index eb6a692c8a..e88a168610 100644 --- a/src/connectors/impls/gbq/writer/sink.rs +++ b/src/connectors/impls/gbq/writer/sink.rs @@ -754,7 +754,7 @@ mod test { }], &SinkContext::new( SinkId::default(), - Alias::new("flow", "connector"), + alias::Connector::new("flow", "connector"), ConnectorType::default(), QuiescenceBeacon::default(), ConnectionLostNotifier::new(rx), @@ -783,7 +783,7 @@ mod test { }], &SinkContext::new( SinkId::default(), - Alias::new("flow", "connector"), + alias::Connector::new("flow", "connector"), ConnectorType::default(), QuiescenceBeacon::default(), ConnectionLostNotifier::new(rx), @@ -821,7 +821,7 @@ mod test { }], &SinkContext::new( SinkId::default(), - Alias::new("flow", "connector"), + alias::Connector::new("flow", "connector"), ConnectorType::default(), QuiescenceBeacon::default(), ConnectionLostNotifier::new(rx), @@ -861,7 +861,7 @@ mod test { }], &SinkContext::new( SinkId::default(), - Alias::new("flow", "connector"), + alias::Connector::new("flow", "connector"), ConnectorType::default(), QuiescenceBeacon::default(), ConnectionLostNotifier::new(rx), @@ -1084,7 +1084,7 @@ mod test { let ctx = SinkContext::new( SinkId::default(), - Alias::new("flow", "connector"), + alias::Connector::new("flow", "connector"), ConnectorType::default(), QuiescenceBeacon::default(), ConnectionLostNotifier::new(rx), @@ -1133,7 +1133,7 @@ mod test { let ctx = SinkContext::new( SinkId::default(), - Alias::new("flow", "connector"), + alias::Connector::new("flow", "connector"), ConnectorType::default(), QuiescenceBeacon::default(), ConnectionLostNotifier::new(rx), @@ -1178,7 +1178,7 @@ mod test { let ctx = SinkContext::new( SinkId::default(), - Alias::new("flow", "connector"), + alias::Connector::new("flow", "connector"), ConnectorType::default(), QuiescenceBeacon::default(), ConnectionLostNotifier::new(rx), @@ -1224,7 +1224,7 @@ mod test { let ctx = SinkContext::new( SinkId::default(), - Alias::new("flow", "connector"), + alias::Connector::new("flow", "connector"), ConnectorType::default(), QuiescenceBeacon::default(), ConnectionLostNotifier::new(rx), @@ -1268,7 +1268,7 @@ mod test { let ctx = SinkContext::new( SinkId::default(), - Alias::new("flow", "connector"), + alias::Connector::new("flow", "connector"), ConnectorType::default(), QuiescenceBeacon::default(), ConnectionLostNotifier::new(rx), @@ -1303,7 +1303,7 @@ mod test { let ctx = SinkContext::new( SinkId::default(), - Alias::new("flow", "connector"), + alias::Connector::new("flow", "connector"), ConnectorType::default(), QuiescenceBeacon::default(), ConnectionLostNotifier::new(rx), @@ -1364,7 +1364,7 @@ mod test { Event::signal_tick(), &SinkContext::new( SinkId::default(), - Alias::new("flow", "connector"), + alias::Connector::new("flow", "connector"), ConnectorType::default(), QuiescenceBeacon::default(), ConnectionLostNotifier::new(rx), @@ -1374,7 +1374,7 @@ mod test { CodecReq::Structured, vec![], &ConnectorType::from(""), - &Alias::new("flow", "connector"), + &alias::Connector::new("flow", "connector"), )?, 0, ) @@ -1407,7 +1407,7 @@ mod test { Event::signal_tick(), &SinkContext::new( SinkId::default(), - Alias::new("flow", "connector"), + alias::Connector::new("flow", "connector"), ConnectorType::default(), QuiescenceBeacon::default(), ConnectionLostNotifier::new(rx), @@ -1417,7 +1417,7 @@ mod test { CodecReq::Structured, vec![], &ConnectorType::from(""), - &Alias::new("flow", "connector"), + &alias::Connector::new("flow", "connector"), )?, 0, ) @@ -1493,7 +1493,7 @@ mod test { let ctx = SinkContext::new( SinkId::default(), - Alias::new("flow", "connector"), + alias::Connector::new("flow", "connector"), ConnectorType::default(), QuiescenceBeacon::default(), ConnectionLostNotifier::new(crate::channel::bounded(1024).0), @@ -1519,7 +1519,7 @@ mod test { CodecReq::Structured, vec![], &ConnectorType::from(""), - &Alias::new("flow", "connector"), + &alias::Connector::new("flow", "connector"), )?, 0, ) @@ -1583,7 +1583,7 @@ mod test { let ctx = SinkContext::new( SinkId::default(), - Alias::new("flow", "connector"), + alias::Connector::new("flow", "connector"), ConnectorType::default(), QuiescenceBeacon::default(), ConnectionLostNotifier::new(crate::channel::bounded(1024).0), @@ -1626,7 +1626,7 @@ mod test { CodecReq::Structured, vec![], &ConnectorType::from(""), - &Alias::new("flow", "connector"), + &alias::Connector::new("flow", "connector"), )?, 0, ) diff --git a/src/connectors/impls/gcl/writer.rs b/src/connectors/impls/gcl/writer.rs index 663641b9d5..ea560ba293 100644 --- a/src/connectors/impls/gcl/writer.rs +++ b/src/connectors/impls/gcl/writer.rs @@ -18,7 +18,6 @@ mod sink; use crate::connectors::google::{GouthTokenProvider, TokenSrc}; use crate::connectors::impls::gcl::writer::sink::{GclSink, TonicChannelFactory}; use crate::connectors::prelude::*; -use crate::connectors::{Alias, Connector, ConnectorBuilder, ConnectorConfig, ConnectorType}; use crate::errors::Error; use googapis::google::api::MonitoredResource; use googapis::google::logging::r#type::LogSeverity; @@ -26,7 +25,7 @@ use serde::Deserialize; use simd_json::OwnedValue; use std::collections::HashMap; use tonic::transport::Channel; -use tremor_pipeline::ConfigImpl; +use tremor_common::alias; #[derive(Deserialize, Clone)] #[serde(deny_unknown_fields)] @@ -185,7 +184,7 @@ impl Config { } } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} fn value_to_monitored_resource( from: Option<&simd_json::OwnedValue>, @@ -268,7 +267,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - _id: &Alias, + _id: &alias::Connector, _: &ConnectorConfig, raw: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/gcl/writer/meta.rs b/src/connectors/impls/gcl/writer/meta.rs index 039bc0ad85..8926431cd7 100644 --- a/src/connectors/impls/gcl/writer/meta.rs +++ b/src/connectors/impls/gcl/writer/meta.rs @@ -162,7 +162,8 @@ mod test { use crate::connectors::impls::gcl::writer::default_log_severity; use googapis::google::logging::r#type::LogSeverity; use std::collections::HashMap as StdHashMap; - use tremor_pipeline::ConfigImpl; + use tremor_config::Impl; + use tremor_value::literal; use tremor_value::structurize; @@ -357,11 +358,10 @@ mod test { ))?; assert_eq!(1, ok_config.labels.len()); - let ko_config: std::result::Result = - Config::new(&literal!({ - "token": {"file": file!().to_string()}, - "labels": "snot" - })); + let ko_config: std::result::Result = Config::new(&literal!({ + "token": {"file": file!().to_string()}, + "labels": "snot" + })); assert!(ko_config.is_err()); Ok(()) diff --git a/src/connectors/impls/gcl/writer/sink.rs b/src/connectors/impls/gcl/writer/sink.rs index b877c68ca4..7b1213612d 100644 --- a/src/connectors/impls/gcl/writer/sink.rs +++ b/src/connectors/impls/gcl/writer/sink.rs @@ -362,7 +362,7 @@ mod test { ); let ctx = SinkContext::new( SinkId::default(), - Alias::new("a", "b"), + alias::Connector::new("a", "b"), ConnectorType::default(), QuiescenceBeacon::default(), ConnectionLostNotifier::new(connection_lost_tx), @@ -391,7 +391,7 @@ mod test { CodecReq::Structured, vec![], &"a".into(), - &Alias::new("a", "b"), + &alias::Connector::new("a", "b"), )?, 0, ) @@ -458,7 +458,7 @@ mod test { Event::signal_tick(), &SinkContext::new( SinkId::default(), - Alias::new("", ""), + alias::Connector::new("", ""), ConnectorType::default(), QuiescenceBeacon::default(), ConnectionLostNotifier::new(rx), @@ -468,7 +468,7 @@ mod test { CodecReq::Structured, vec![], &ConnectorType::from(""), - &Alias::new("", ""), + &alias::Connector::new("", ""), )?, 0, ) diff --git a/src/connectors/impls/gcs/streamer.rs b/src/connectors/impls/gcs/streamer.rs index 80b54bd28a..7088227aa2 100644 --- a/src/connectors/impls/gcs/streamer.rs +++ b/src/connectors/impls/gcs/streamer.rs @@ -33,7 +33,7 @@ use crate::{ }; use std::time::Duration; use tremor_common::time::nanotime; -use tremor_pipeline::{ConfigImpl, EventId, OpMeta}; +use tremor_pipeline::{EventId, OpMeta}; use tremor_value::Value; const CONNECTOR_TYPE: &str = "gcs_streamer"; @@ -82,10 +82,10 @@ fn default_backoff_base_time() -> u64 { 25_000_000 } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} impl Config { - fn normalize(&mut self, alias: &Alias) { + fn normalize(&mut self, alias: &alias::Connector) { let buffer_size = next_multiple_of(self.buffer_size, 256 * 1024); if self.buffer_size < buffer_size { @@ -117,7 +117,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - alias: &Alias, + alias: &alias::Connector, _config: &ConnectorConfig, connector_config: &Value, _kill_switch: &KillSwitch, @@ -404,7 +404,7 @@ pub(crate) mod tests { use super::*; use crate::{ channel::{bounded, unbounded}, - config::{Codec as CodecConfig, Reconnect}, + config::Reconnect, connectors::{ impls::gcs::{resumable_upload_client::ResumableUploadClient, streamer::Mode}, reconnect::ConnectionLostNotifier, @@ -529,7 +529,7 @@ pub(crate) mod tests { }); let mut config = Config::new(&raw_config).expect("config should be valid"); - let alias = Alias::new("flow", "conn"); + let alias = alias::Connector::new("flow", "conn"); config.normalize(&alias); assert_eq!(256 * 1024, config.buffer_size); } @@ -570,7 +570,7 @@ pub(crate) mod tests { let (connection_lost_tx, _) = bounded(10); - let alias = Alias::new("a", "b"); + let alias = alias::Connector::new("a", "b"); let context = SinkContext::new( SinkId::default(), alias.clone(), @@ -579,7 +579,7 @@ pub(crate) mod tests { ConnectionLostNotifier::new(connection_lost_tx), ); let mut serializer = EventSerializer::new( - Some(CodecConfig::from("json")), + Some(tremor_codec::Config::from("json")), CodecReq::Required, vec![], &"gcs_streamer".into(), @@ -756,7 +756,7 @@ pub(crate) mod tests { let (connection_lost_tx, _) = bounded(10); - let alias = Alias::new("a", "b"); + let alias = alias::Connector::new("a", "b"); let context = SinkContext::new( SinkId::default(), alias.clone(), @@ -765,7 +765,7 @@ pub(crate) mod tests { ConnectionLostNotifier::new(connection_lost_tx), ); let mut serializer = EventSerializer::new( - Some(CodecConfig::from("json")), + Some(tremor_codec::Config::from("json")), CodecReq::Required, vec![], &"gcs_streamer".into(), @@ -930,7 +930,7 @@ pub(crate) mod tests { let (connection_lost_tx, _) = bounded(10); - let alias = Alias::new("a", "b"); + let alias = alias::Connector::new("a", "b"); let context = SinkContext::new( SinkId::default(), alias.clone(), @@ -939,7 +939,7 @@ pub(crate) mod tests { ConnectionLostNotifier::new(connection_lost_tx), ); let mut serializer = EventSerializer::new( - Some(CodecConfig::from("json")), + Some(tremor_codec::Config::from("json")), CodecReq::Required, vec![], &"gcs_streamer".into(), @@ -1025,7 +1025,7 @@ pub(crate) mod tests { let (connection_lost_tx, _) = bounded(10); - let alias = Alias::new("a", "b"); + let alias = alias::Connector::new("a", "b"); let context = SinkContext::new( SinkId::default(), alias.clone(), @@ -1079,7 +1079,7 @@ pub(crate) mod tests { let (connection_lost_tx, _) = bounded(10); - let alias = Alias::new("a", "b"); + let alias = alias::Connector::new("a", "b"); let context = SinkContext::new( SinkId::default(), alias.clone(), @@ -1088,7 +1088,7 @@ pub(crate) mod tests { ConnectionLostNotifier::new(connection_lost_tx), ); let mut serializer = EventSerializer::new( - Some(CodecConfig::from("json")), + Some(tremor_codec::Config::from("json")), CodecReq::Required, vec![], &"gcs_streamer".into(), @@ -1304,7 +1304,7 @@ pub(crate) mod tests { let (connection_lost_tx, _) = bounded(10); - let alias = Alias::new("a", "b"); + let alias = alias::Connector::new("a", "b"); let context = SinkContext::new( SinkId::default(), alias.clone(), @@ -1313,7 +1313,7 @@ pub(crate) mod tests { ConnectionLostNotifier::new(connection_lost_tx), ); let mut serializer = EventSerializer::new( - Some(CodecConfig::from("json")), + Some(tremor_codec::Config::from("json")), CodecReq::Required, vec![], &"gcs_streamer".into(), @@ -1453,7 +1453,7 @@ pub(crate) mod tests { metrics_interval_s: None, }; let kill_switch = KillSwitch::dummy(); - let alias = Alias::new("snot", "badger"); + let alias = alias::Connector::new("snot", "badger"); let mut connector_id_gen = ConnectorIdGen::default(); // lets cover create-sink here @@ -1484,7 +1484,7 @@ pub(crate) mod tests { metrics_interval_s: None, }; let kill_switch = KillSwitch::dummy(); - let alias = Alias::new("snot", "badger"); + let alias = alias::Connector::new("snot", "badger"); let mut connector_id_gen = ConnectorIdGen::default(); // lets cover create-sink here diff --git a/src/connectors/impls/gpubsub/consumer.rs b/src/connectors/impls/gpubsub/consumer.rs index 9ce11f6a0d..d586b8328d 100644 --- a/src/connectors/impls/gpubsub/consumer.rs +++ b/src/connectors/impls/gpubsub/consumer.rs @@ -36,7 +36,6 @@ use tonic::codegen::InterceptedService; use tonic::transport::{Certificate, Channel, ClientTlsConfig}; use tonic::{Code, Status}; use tremor_common::blue_green_hashmap::BlueGreenHashMap; -use tremor_pipeline::ConfigImpl; // controlling retries upon gpubsub returning `Unavailable` from StreamingPull // this in on purpose not exposed via config as this should remain an internal thing @@ -59,7 +58,7 @@ struct Config { #[serde(default = "crate::connectors::impls::gpubsub::default_endpoint")] pub url: Url, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} fn default_ack_deadline() -> u64 { 10_000_000_000u64 // 10 seconds @@ -91,7 +90,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - alias: &Alias, + alias: &alias::Connector, _: &ConnectorConfig, raw: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/gpubsub/producer.rs b/src/connectors/impls/gpubsub/producer.rs index cce134f60f..226c2ce5a1 100644 --- a/src/connectors/impls/gpubsub/producer.rs +++ b/src/connectors/impls/gpubsub/producer.rs @@ -13,15 +13,7 @@ // limitations under the License. use crate::connectors::google::{AuthInterceptor, TokenProvider, TokenSrc}; -use crate::connectors::prelude::{ - Alias, Attempt, ErrorKind, EventSerializer, KillSwitch, SinkAddr, SinkContext, - SinkManagerBuilder, SinkReply, Url, -}; -use crate::connectors::sink::Sink; -use crate::connectors::{ - CodecReq, Connector, ConnectorBuilder, ConnectorConfig, ConnectorContext, ConnectorType, - Context, -}; +use crate::connectors::prelude::*; use crate::errors::Result; use googapis::google::pubsub::v1::publisher_client::PublisherClient; use googapis::google::pubsub::v1::{PublishRequest, PubsubMessage}; @@ -33,7 +25,7 @@ use tonic::codegen::InterceptedService; use tonic::transport::{Certificate, Channel, ClientTlsConfig}; use tonic::Code; use tremor_common::url::HttpsDefaults; -use tremor_pipeline::{ConfigImpl, Event}; +use tremor_pipeline::Event; use tremor_value::Value; use value_trait::ValueAccess; @@ -50,7 +42,7 @@ pub(crate) struct Config { pub topic: String, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Default, Debug)] pub(crate) struct Builder {} @@ -70,7 +62,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - _alias: &Alias, + _alias: &alias::Connector, _config: &ConnectorConfig, raw_config: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/http/auth.rs b/src/connectors/impls/http/auth.rs index 878a71e78c..9b6052deb2 100644 --- a/src/connectors/impls/http/auth.rs +++ b/src/connectors/impls/http/auth.rs @@ -13,9 +13,8 @@ // limitations under the License. use crate::errors::Result; -use base64::Engine; use std::io::Write; -use tremor_common::base64::BASE64; +use tremor_common::base64::{Engine, BASE64}; /// Authorization methods #[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] diff --git a/src/connectors/impls/http/client.rs b/src/connectors/impls/http/client.rs index e52067ab7d..d128cd66df 100644 --- a/src/connectors/impls/http/client.rs +++ b/src/connectors/impls/http/client.rs @@ -15,13 +15,13 @@ use super::auth::Auth; use super::meta::{extract_request_meta, extract_response_meta, HttpRequestBuilder}; use super::utils::{Header, RequestId}; +use crate::connectors::sink::concurrency_cap::ConcurrencyCap; use crate::connectors::utils::mime::MimeCodecMap; use crate::connectors::utils::tls::TLSClientConfig; use crate::{ channel::{bounded, Receiver, Sender}, errors::empty_error, }; -use crate::{config::NameWithConfig, connectors::sink::concurrency_cap::ConcurrencyCap}; use crate::{connectors::prelude::*, errors::err_connector_def}; use either::Either; use halfbrown::HashMap; @@ -36,6 +36,7 @@ use std::sync::Arc; use std::{sync::atomic::AtomicBool, time::Duration}; use tokio::time::timeout; use tremor_common::time::nanotime; +use tremor_config::NameWithConfig; // pipeline -> Sink -> http client // | @@ -97,7 +98,7 @@ fn default_method() -> SerdeishMethod { } // for new -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Debug, Default)] pub(crate) struct Builder {} @@ -110,7 +111,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - id: &Alias, + id: &alias::Connector, _connector_config: &ConnectorConfig, config: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/http/meta.rs b/src/connectors/impls/http/meta.rs index d48273ba60..b3ad2bbd02 100644 --- a/src/connectors/impls/http/meta.rs +++ b/src/connectors/impls/http/meta.rs @@ -16,10 +16,7 @@ use std::convert::Infallible; use super::{client, utils::RequestId}; use crate::channel::{bounded, Sender}; -use crate::{ - config::NameWithConfig, - connectors::{prelude::*, utils::mime::MimeCodecMap}, -}; +use crate::connectors::{prelude::*, utils::mime::MimeCodecMap}; use either::Either; use http::{ header::{self, HeaderName}, @@ -319,7 +316,7 @@ mod test { CodecReq::Optional("json"), vec![], &ConnectorType("http".into()), - &Alias::new("flow", "http"), + &alias::Connector::new("flow", "http"), )?; let config = client::Config::new(&c)?; diff --git a/src/connectors/impls/http/server.rs b/src/connectors/impls/http/server.rs index f81d31982f..71f4bdf4e1 100644 --- a/src/connectors/impls/http/server.rs +++ b/src/connectors/impls/http/server.rs @@ -22,10 +22,8 @@ use crate::{ errors::empty_error, }; use crate::{ - config::NameWithConfig, connectors::{ prelude::*, - spawn_task, utils::{mime::MimeCodecMap, tls::TLSServerConfig}, }, errors::err_connector_def, @@ -63,7 +61,7 @@ pub(crate) struct Config { mime_mapping: Option>, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Debug, Default)] pub(crate) struct Builder {} @@ -81,7 +79,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - id: &Alias, + id: &alias::Connector, _raw_config: &ConnectorConfig, config: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/kafka.rs b/src/connectors/impls/kafka.rs index 4c19b1c524..03551fdb5c 100644 --- a/src/connectors/impls/kafka.rs +++ b/src/connectors/impls/kafka.rs @@ -308,7 +308,7 @@ use tremor_value::Value; const KAFKA_CONNECT_TIMEOUT: Duration = Duration::from_secs(1); /// verify broker host:port pairs in kafka connector configs -fn verify_brokers(alias: &Alias, brokers: &[String]) -> Result<(String, Option)> { +fn verify_brokers(alias: &alias::Connector, brokers: &[String]) -> Result<(String, Option)> { let mut first_broker: Option<(String, Option)> = None; for broker in brokers { match broker.split(':').collect::>().as_slice() { diff --git a/src/connectors/impls/kafka/consumer.rs b/src/connectors/impls/kafka/consumer.rs index 08fe7ecc1d..e8f9a589ba 100644 --- a/src/connectors/impls/kafka/consumer.rs +++ b/src/connectors/impls/kafka/consumer.rs @@ -245,7 +245,7 @@ pub(crate) struct Config { test_options: HashMap, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} fn default_commit_interval() -> u64 { 5_000_000_000 // 5 seconds, the default from librdkafka @@ -262,7 +262,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - alias: &Alias, + alias: &alias::Connector, config: &ConnectorConfig, raw_config: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/kafka/producer.rs b/src/connectors/impls/kafka/producer.rs index 5dee612de7..425432778d 100644 --- a/src/connectors/impls/kafka/producer.rs +++ b/src/connectors/impls/kafka/producer.rs @@ -66,7 +66,7 @@ pub(crate) struct Config { rdkafka_options: Option>, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Default, Debug)] pub(crate) struct Builder {} @@ -79,7 +79,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - alias: &Alias, + alias: &alias::Connector, config: &ConnectorConfig, raw_config: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/kv.rs b/src/connectors/impls/kv.rs index fbb6eb57ec..3b038e671c 100644 --- a/src/connectors/impls/kv.rs +++ b/src/connectors/impls/kv.rs @@ -270,7 +270,7 @@ pub(crate) struct Config { path: String, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Debug, Default)] pub(crate) struct Builder {} @@ -285,7 +285,7 @@ impl ConnectorBuilder for Builder { } async fn build_cfg( &self, - id: &Alias, + id: &alias::Connector, _: &ConnectorConfig, config: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/metrics.rs b/src/connectors/impls/metrics.rs index 6d9d38b671..c6c6d78e87 100644 --- a/src/connectors/impls/metrics.rs +++ b/src/connectors/impls/metrics.rs @@ -192,7 +192,7 @@ impl ConnectorBuilder for Builder { } async fn build( &self, - _id: &Alias, + _id: &alias::Connector, _config: &ConnectorConfig, _kill_switch: &KillSwitch, ) -> Result> { diff --git a/src/connectors/impls/metronome.rs b/src/connectors/impls/metronome.rs index 1485a451d9..70d8b6155f 100644 --- a/src/connectors/impls/metronome.rs +++ b/src/connectors/impls/metronome.rs @@ -111,7 +111,7 @@ pub(crate) struct Config { pub interval: u64, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Debug, Default)] pub(crate) struct Builder {} @@ -124,7 +124,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - _: &Alias, + _: &alias::Connector, _: &ConnectorConfig, raw: &Value, _kill_switch: &KillSwitch, @@ -228,7 +228,7 @@ mod tests { #[tokio::test(flavor = "multi_thread")] async fn missing_config() -> Result<()> { - let alias = Alias::new("flow", "connector"); + let alias = alias::Connector::new("flow", "connector"); let builder = super::Builder::default(); let connector_config = super::ConnectorConfig { connector_type: builder.connector_type(), diff --git a/src/connectors/impls/null.rs b/src/connectors/impls/null.rs index c3ab701266..8ff56da4c9 100644 --- a/src/connectors/impls/null.rs +++ b/src/connectors/impls/null.rs @@ -98,7 +98,7 @@ impl ConnectorBuilder for Builder { async fn build( &self, - _alias: &Alias, + _alias: &alias::Connector, _config: &ConnectorConfig, _kill_switch: &KillSwitch, ) -> Result> { diff --git a/src/connectors/impls/otel/client.rs b/src/connectors/impls/otel/client.rs index b0f52cf8c0..1039adb60e 100644 --- a/src/connectors/impls/otel/client.rs +++ b/src/connectors/impls/otel/client.rs @@ -42,7 +42,7 @@ pub(crate) struct Config { pub(crate) metrics: bool, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} /// The `OpenTelemetry` client connector pub(crate) struct Client { @@ -68,7 +68,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - _id: &Alias, + _id: &alias::Connector, _: &ConnectorConfig, config: &Value, _kill_switch: &KillSwitch, @@ -212,7 +212,7 @@ mod tests { #[tokio::test(flavor = "multi_thread")] async fn otel_client_builder() -> Result<()> { - let alias = Alias::new("flow", "my_otel_client"); + let alias = alias::Connector::new("flow", "my_otel_client"); let with_processors = literal!({ "config": { "url": "localhost:4317", diff --git a/src/connectors/impls/otel/server.rs b/src/connectors/impls/otel/server.rs index 2bc7209882..4732ba7770 100644 --- a/src/connectors/impls/otel/server.rs +++ b/src/connectors/impls/otel/server.rs @@ -37,7 +37,7 @@ pub(crate) struct Config { pub(crate) metrics: bool, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} /// The `OpenTelemetry` client connector pub(crate) struct Server { @@ -68,7 +68,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - id: &Alias, + id: &alias::Connector, _: &ConnectorConfig, config: &Value, _kill_switch: &KillSwitch, @@ -200,7 +200,7 @@ mod tests { #[tokio::test(flavor = "multi_thread")] async fn otel_client_builder() -> Result<()> { - let alias = Alias::new("test", "my_otel_server"); + let alias = alias::Connector::new("test", "my_otel_server"); let with_processors = literal!({ "config": { "url": "localhost:4317", @@ -211,7 +211,7 @@ mod tests { ConnectorType("otel_server".into()), &with_processors, )?; - let alias = Alias::new("flow", "my_otel_server"); + let alias = alias::Connector::new("flow", "my_otel_server"); let builder = super::Builder::default(); let kill_switch = KillSwitch::dummy(); diff --git a/src/connectors/impls/s3/reader.rs b/src/connectors/impls/s3/reader.rs index e3ea111c7b..42d9038fcb 100644 --- a/src/connectors/impls/s3/reader.rs +++ b/src/connectors/impls/s3/reader.rs @@ -82,7 +82,7 @@ impl Config { } } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Debug, Default)] pub(crate) struct Builder {} @@ -95,7 +95,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - _: &Alias, + _: &alias::Connector, _: &ConnectorConfig, config: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/s3/streamer.rs b/src/connectors/impls/s3/streamer.rs index 33f93f0dd4..2739b26370 100644 --- a/src/connectors/impls/s3/streamer.rs +++ b/src/connectors/impls/s3/streamer.rs @@ -66,7 +66,7 @@ impl Config { MORE_THEN_FIVEMBS } - fn normalize(&mut self, alias: &Alias) { + fn normalize(&mut self, alias: &alias::Connector) { if self.buffer_size < MORE_THEN_FIVEMBS { warn!("[Connector::{alias}] Setting `buffer_size` up to minimum of 5MB."); self.buffer_size = MORE_THEN_FIVEMBS; @@ -74,7 +74,7 @@ impl Config { } } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Debug, Default)] pub(crate) struct Builder {} @@ -87,7 +87,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - id: &Alias, + id: &alias::Connector, _: &ConnectorConfig, config: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/stdio.rs b/src/connectors/impls/stdio.rs index 9696e387ac..78753baa82 100644 --- a/src/connectors/impls/stdio.rs +++ b/src/connectors/impls/stdio.rs @@ -96,7 +96,7 @@ impl ConnectorBuilder for Builder { } async fn build( &self, - _id: &Alias, + _id: &alias::Connector, _raw_config: &ConnectorConfig, _kill_switch: &KillSwitch, ) -> Result> { diff --git a/src/connectors/impls/tcp.rs b/src/connectors/impls/tcp.rs index f5a06d6c1b..d5100b3e19 100644 --- a/src/connectors/impls/tcp.rs +++ b/src/connectors/impls/tcp.rs @@ -137,7 +137,7 @@ where { wrapped_stream: S, buffer: Vec, - alias: Alias, + alias: alias::Connector, origin_uri: EventOriginUri, meta: Value<'static>, // notify the writer when the connection is done, @@ -149,7 +149,7 @@ impl TcpReader> { fn new( wrapped_stream: ReadHalf, buffer: Vec, - alias: Alias, + alias: alias::Connector, origin_uri: EventOriginUri, meta: Value<'static>, sink_runtime: Option>, @@ -169,7 +169,7 @@ impl TcpReader>> { fn tls_server( stream: ReadHalf>, buffer: Vec, - alias: Alias, + alias: alias::Connector, origin_uri: EventOriginUri, meta: Value<'static>, sink_runtime: Option>, @@ -189,7 +189,7 @@ impl TcpReader>> { fn tls_client( stream: ReadHalf>, buffer: Vec, - alias: Alias, + alias: alias::Connector, origin_uri: EventOriginUri, meta: Value<'static>, ) -> Self { diff --git a/src/connectors/impls/tcp/client.rs b/src/connectors/impls/tcp/client.rs index 36cd3160eb..7eaf5a3694 100644 --- a/src/connectors/impls/tcp/client.rs +++ b/src/connectors/impls/tcp/client.rs @@ -54,7 +54,7 @@ pub(crate) struct Config { socket_options: TcpSocketOptions, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} pub(crate) struct TcpClient { config: Config, @@ -78,7 +78,7 @@ impl ConnectorBuilder for Builder { } async fn build_cfg( &self, - id: &Alias, + id: &alias::Connector, _: &ConnectorConfig, config: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/tcp/server.rs b/src/connectors/impls/tcp/server.rs index d5c22e08b1..dcac552543 100644 --- a/src/connectors/impls/tcp/server.rs +++ b/src/connectors/impls/tcp/server.rs @@ -53,7 +53,7 @@ pub(crate) struct Config { socket_options: TcpSocketOptions, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[allow(clippy::module_name_repetitions)] pub(crate) struct TcpServer { @@ -75,7 +75,7 @@ impl ConnectorBuilder for Builder { } async fn build_cfg( &self, - id: &Alias, + id: &alias::Connector, _: &ConnectorConfig, config: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/udp/client.rs b/src/connectors/impls/udp/client.rs index e7595583cb..e3a86dd6db 100644 --- a/src/connectors/impls/udp/client.rs +++ b/src/connectors/impls/udp/client.rs @@ -32,7 +32,7 @@ pub(crate) struct Config { socket_options: UdpSocketOptions, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} struct UdpClient { config: Config, @@ -48,7 +48,7 @@ impl ConnectorBuilder for Builder { } async fn build_cfg( &self, - _id: &Alias, + _id: &alias::Connector, _: &ConnectorConfig, config: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/udp/server.rs b/src/connectors/impls/udp/server.rs index 142ac8afbf..e58bbfae7e 100644 --- a/src/connectors/impls/udp/server.rs +++ b/src/connectors/impls/udp/server.rs @@ -35,7 +35,7 @@ pub(crate) struct Config { socket_options: UdpSocketOptions, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} struct UdpServer { config: Config, @@ -51,7 +51,7 @@ impl ConnectorBuilder for Builder { } async fn build_cfg( &self, - _: &Alias, + _: &alias::Connector, _: &ConnectorConfig, raw: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/unix_socket/client.rs b/src/connectors/impls/unix_socket/client.rs index 90987f2c56..f9ebc4eb07 100644 --- a/src/connectors/impls/unix_socket/client.rs +++ b/src/connectors/impls/unix_socket/client.rs @@ -34,7 +34,7 @@ pub(crate) struct Config { buf_size: usize, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Debug, Default)] pub(crate) struct Builder {} @@ -46,7 +46,7 @@ impl ConnectorBuilder for Builder { } async fn build_cfg( &self, - _: &Alias, + _: &alias::Connector, _: &ConnectorConfig, conf: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/unix_socket/server.rs b/src/connectors/impls/unix_socket/server.rs index b897bd4d4c..c37150e503 100644 --- a/src/connectors/impls/unix_socket/server.rs +++ b/src/connectors/impls/unix_socket/server.rs @@ -51,7 +51,7 @@ pub(crate) struct Config { buf_size: usize, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} //struct ConnectionMeta {} @@ -65,7 +65,7 @@ impl ConnectorBuilder for Builder { async fn build_cfg( &self, - _: &Alias, + _: &alias::Connector, _: &ConnectorConfig, config: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/wal.rs b/src/connectors/impls/wal.rs index ebf54a03bb..52d6270925 100644 --- a/src/connectors/impls/wal.rs +++ b/src/connectors/impls/wal.rs @@ -52,7 +52,7 @@ pub(crate) struct Config { max_chunks: usize, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} struct Wal { event_origin_uri: EventOriginUri, @@ -69,7 +69,7 @@ impl ConnectorBuilder for Builder { } async fn build_cfg( &self, - _: &Alias, + _: &alias::Connector, _: &ConnectorConfig, config: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/ws/client.rs b/src/connectors/impls/ws/client.rs index 27b99f0f38..adad81f916 100644 --- a/src/connectors/impls/ws/client.rs +++ b/src/connectors/impls/ws/client.rs @@ -47,7 +47,7 @@ pub(crate) struct Config { tls: Option>, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Debug, Default)] pub(crate) struct Builder {} @@ -64,7 +64,7 @@ impl ConnectorBuilder for Builder { } async fn build_cfg( &self, - id: &Alias, + id: &alias::Connector, _: &ConnectorConfig, config: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/impls/ws/server.rs b/src/connectors/impls/ws/server.rs index ee9795cd76..80ca86dac9 100644 --- a/src/connectors/impls/ws/server.rs +++ b/src/connectors/impls/ws/server.rs @@ -47,7 +47,7 @@ pub(crate) struct Config { tls: Option, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[allow(clippy::module_name_repetitions)] pub(crate) struct WsServer { @@ -70,7 +70,7 @@ impl ConnectorBuilder for Builder { } async fn build_cfg( &self, - _id: &Alias, + _id: &alias::Connector, _: &ConnectorConfig, raw_config: &Value, _kill_switch: &KillSwitch, diff --git a/src/connectors/prelude.rs b/src/connectors/prelude.rs index ff975ebc5f..1ffa9a62ae 100644 --- a/src/connectors/prelude.rs +++ b/src/connectors/prelude.rs @@ -27,7 +27,7 @@ pub(crate) use crate::{ }, spawn_task, utils::reconnect::Attempt, - Alias, CodecReq, Connector, ConnectorBuilder, ConnectorContext, ConnectorType, Context, + CodecReq, Connector, ConnectorBuilder, ConnectorContext, ConnectorType, Context, StreamDone, StreamIdGen, ACCEPT_TIMEOUT, }, errors::{err_connector_def, Error, Kind as ErrorKind, Result}, @@ -36,15 +36,15 @@ pub(crate) use crate::{ utils::hostname, Event, }; - pub(crate) use std::sync::atomic::Ordering; +pub use tremor_common::alias; pub(crate) use tremor_common::{ ports::{Port, ERR, IN, OUT}, url::{Defaults, HttpsDefaults, Url}, }; -pub use tremor_pipeline::{ - CbAction, ConfigImpl, EventIdGenerator, EventOriginUri, DEFAULT_STREAM_ID, -}; +pub(crate) use tremor_config::Impl; +pub use tremor_config::NameWithConfig; +pub use tremor_pipeline::{CbAction, EventIdGenerator, EventOriginUri, DEFAULT_STREAM_ID}; pub(crate) use tremor_script::prelude::*; /// default buf size used for reading from files and streams (sockets etc) /// @@ -68,9 +68,4 @@ pub(crate) fn default_backlog() -> i32 { /// Encapsulates connector configuration pub(crate) use crate::connectors::ConnectorConfig; -pub(crate) fn default_true() -> bool { - true -} -pub(crate) fn default_false() -> bool { - false -} +pub(crate) use tremor_common::{default_false, default_true}; diff --git a/src/connectors/sink.rs b/src/connectors/sink.rs index be798d7b30..95ac4cb8d4 100644 --- a/src/connectors/sink.rs +++ b/src/connectors/sink.rs @@ -21,20 +21,9 @@ pub(crate) mod concurrency_cap; use crate::{ channel::{bounded, unbounded, Receiver, Sender, UnboundedReceiver, UnboundedSender}, - config::{ - Codec as CodecConfig, Connector as ConnectorConfig, NameWithConfig, - Postprocessor as PostprocessorConfig, - }, - connectors::{ - utils::{ - metrics::SinkReporter, - reconnect::{Attempt, ConnectionLostNotifier}, - }, - Alias, CodecReq, ConnectorType, Context, Msg, QuiescenceBeacon, StreamDone, - }, - errors::Result, + config::Connector as ConnectorConfig, + connectors::prelude::*, pipeline, - postprocessor::{finish, make_postprocessors, postprocess, Postprocessors}, primerge::PriorityMerge, qsize, }; @@ -53,10 +42,15 @@ use tremor_common::{ ports::Port, time::nanotime, }; +use tremor_interceptor::postprocessor::{ + self, finish, make_postprocessors, postprocess, Postprocessors, +}; use tremor_pipeline::{CbAction, Event, EventId, OpMeta, SignalKind, DEFAULT_STREAM_ID}; use tremor_script::{ast::DeployEndpoint, EventPayload}; use tremor_value::Value; +use super::{metrics::SinkReporter, ConnectionLostNotifier, Msg, QuiescenceBeacon}; + pub(crate) type ReplySender = UnboundedSender; /// Result for a sink function that may provide insights or response. @@ -263,7 +257,7 @@ pub(crate) struct SinkContextInner { /// the connector unique identifier pub(crate) uid: SinkId, /// the connector alias - pub(crate) alias: Alias, + pub(crate) alias: alias::Connector, /// the connector type pub(crate) connector_type: ConnectorType, @@ -284,7 +278,7 @@ impl SinkContext { } pub(crate) fn new( uid: SinkId, - alias: Alias, + alias: alias::Connector, connector_type: ConnectorType, quiescence_beacon: QuiescenceBeacon, notifier: ConnectionLostNotifier, @@ -309,7 +303,7 @@ impl Context for SinkContext { // fn uid(&self) -> &SinkId { // &self.0.uid // } - fn alias(&self) -> &Alias { + fn alias(&self) -> &alias::Connector { &self.0.alias } @@ -418,7 +412,7 @@ impl SinkManagerBuilder { pub(crate) fn builder( config: &ConnectorConfig, connector_codec_requirement: CodecReq, - alias: &Alias, + alias: &alias::Connector, metrics_reporter: SinkReporter, ) -> Result { @@ -452,8 +446,8 @@ pub(crate) struct EventSerializer { pub(crate) codec: Box, postprocessors: Postprocessors, // creation templates for stream handling - codec_config: CodecConfig, - postprocessor_configs: Vec, + codec_config: tremor_codec::Config, + postprocessor_configs: Vec, // stream data // TODO: clear out state from codec, postprocessors and enable reuse streams: BTreeMap, Postprocessors)>, @@ -461,11 +455,11 @@ pub(crate) struct EventSerializer { impl EventSerializer { pub(crate) fn new( - codec_config: Option, + codec_config: Option, default_codec: CodecReq, - postprocessor_configs: Vec, + postprocessor_configs: Vec, connector_type: &ConnectorType, - alias: &Alias, + alias: &alias::Connector, ) -> Result { let codec_config = match default_codec { CodecReq::Structured => { @@ -475,11 +469,13 @@ impl EventSerializer { ) .into()); } - CodecConfig::from("null") + tremor_codec::Config::from("null") } CodecReq::Required => codec_config .ok_or_else(|| format!("Missing codec for {connector_type} connector {alias}"))?, - CodecReq::Optional(opt) => codec_config.unwrap_or_else(|| CodecConfig::from(opt)), + CodecReq::Optional(opt) => { + codec_config.unwrap_or_else(|| tremor_codec::Config::from(opt)) + } }; let codec = codec::resolve(&codec_config)?; @@ -552,22 +548,22 @@ impl EventSerializer { ) -> Result>> { if stream_id == DEFAULT_STREAM_ID { // no codec_overwrite for the default stream - postprocess( + Ok(postprocess( &mut self.postprocessors, ingest_ns, self.codec.encode(value, meta).await?, &self.alias, - ) + )?) } else { match self.streams.entry(stream_id) { Entry::Occupied(mut entry) => { let (codec, pps) = entry.get_mut(); - postprocess( + Ok(postprocess( pps, ingest_ns, codec.encode(value, meta).await?, &self.alias, - ) + )?) } Entry::Vacant(entry) => { // codec overwrite only considered for new streams @@ -578,7 +574,12 @@ impl EventSerializer { let pps = make_postprocessors(self.postprocessor_configs.as_slice())?; // insert data for a new stream let (c, pps2) = entry.insert((codec, pps)); - postprocess(pps2, ingest_ns, c.encode(value, meta).await?, &self.alias) + Ok(postprocess( + pps2, + ingest_ns, + c.encode(value, meta).await?, + &self.alias, + )?) } } } @@ -587,7 +588,7 @@ impl EventSerializer { /// remove and flush out any pending data from the stream identified by the given `stream_id` pub(crate) fn finish_stream(&mut self, stream_id: u64) -> Result>> { if let Some((mut _codec, mut postprocessors)) = self.streams.remove(&stream_id) { - finish(&mut postprocessors, &self.alias) + Ok(finish(&mut postprocessors, &self.alias)?) } else { Ok(vec![]) } diff --git a/src/connectors/source.rs b/src/connectors/source.rs index 0f07c04941..d29ce91ddf 100644 --- a/src/connectors/source.rs +++ b/src/connectors/source.rs @@ -21,19 +21,12 @@ use crate::channel::{unbounded, Sender, UnboundedReceiver, UnboundedSender}; use crate::connectors::{ metrics::SourceReporter, utils::reconnect::{Attempt, ConnectionLostNotifier}, - Alias, ConnectorType, Context, Msg, QuiescenceBeacon, StreamDone, + ConnectorType, Context, Msg, QuiescenceBeacon, StreamDone, }; +use crate::errors::empty_error; use crate::errors::{Error, Result}; use crate::pipeline; use crate::pipeline::InputTarget; -use crate::preprocessor::{finish, make_preprocessors, preprocess, Preprocessors}; -use crate::{ - config::{ - self, Codec as CodecConfig, Connector as ConnectorConfig, NameWithConfig, - Preprocessor as PreprocessorConfig, - }, - errors::empty_error, -}; pub(crate) use channel_source::{ChannelSource, ChannelSourceRuntime}; use hashbrown::HashSet; use simd_json::Mutable; @@ -42,10 +35,14 @@ use std::fmt::Display; use tokio::task; use tremor_codec::{self as codec, Codec}; use tremor_common::{ + alias, ids::{Id, SinkId, SourceId}, ports::{Port, ERR, OUT}, time::nanotime, }; +use tremor_config::NameWithConfig; +use tremor_interceptor::preprocessor; +use tremor_interceptor::preprocessor::{finish, make_preprocessors, preprocess, Preprocessors}; use tremor_pipeline::{ CbAction, Event, EventId, EventIdGenerator, EventOriginUri, DEFAULT_STREAM_ID, }; @@ -281,7 +278,7 @@ pub(crate) struct SourceContext { /// connector uid pub uid: SourceId, /// connector alias - pub(crate) alias: Alias, + pub(crate) alias: alias::Connector, /// connector type pub(crate) connector_type: ConnectorType, @@ -299,7 +296,7 @@ impl Display for SourceContext { } impl Context for SourceContext { - fn alias(&self) -> &Alias { + fn alias(&self) -> &alias::Connector { &self.alias } @@ -387,7 +384,7 @@ impl SourceManagerBuilder { /// - on invalid connector configuration pub(crate) fn builder( source_uid: SourceId, - config: &ConnectorConfig, + config: &super::ConnectorConfig, connector_default_codec: CodecReq, source_metrics_reporter: SourceReporter, ) -> Result { @@ -401,7 +398,7 @@ pub(crate) fn builder( ) .into()); } - CodecConfig::from("null") + tremor_codec::Config::from("null") } CodecReq::Required => config .codec @@ -410,7 +407,7 @@ pub(crate) fn builder( CodecReq::Optional(opt) => config .codec .clone() - .unwrap_or_else(|| CodecConfig::from(opt)), + .unwrap_or_else(|| tremor_codec::Config::from(opt)), }; let streams = Streams::new(source_uid, codec_config, preprocessor_configs); @@ -424,8 +421,8 @@ pub(crate) fn builder( // TODO: there is optimization potential here for reusing codec and preprocessors after a stream got ended struct Streams { uid: SourceId, - codec_config: CodecConfig, - preprocessor_configs: Vec, + codec_config: tremor_codec::Config, + preprocessor_configs: Vec, states: BTreeMap, } @@ -436,8 +433,8 @@ impl Streams { /// constructor fn new( uid: SourceId, - codec_config: config::Codec, - preprocessor_configs: Vec, + codec_config: tremor_codec::Config, + preprocessor_configs: Vec, ) -> Self { let states = BTreeMap::new(); // We used to initialize the default stream here, @@ -494,9 +491,9 @@ impl Streams { fn build_stream( source_uid: SourceId, stream_id: u64, - codec_config: &CodecConfig, + codec_config: &tremor_codec::Config, codec_overwrite: Option, - preprocessor_configs: &[PreprocessorConfig], + preprocessor_configs: &[preprocessor::Config], ) -> Result { let codec = if let Some(codec_overwrite) = codec_overwrite { codec::resolve(&codec_overwrite)? @@ -1243,7 +1240,7 @@ where /// preprocessor or codec errors are turned into events to the ERR port of the source/connector #[allow(clippy::too_many_arguments)] async fn build_events( - alias: &Alias, + alias: &alias::Connector, stream_state: &mut StreamState, ingest_ns: &mut u64, pull_id: u64, @@ -1300,7 +1297,13 @@ async fn build_events( } Err(e) => { // preprocessor error - let err_payload = make_error(alias, &e, stream_state.stream_id, pull_id, meta); + let err_payload = make_error( + alias, + &Error::from(e), + stream_state.stream_id, + pull_id, + meta, + ); let event = build_event( stream_state, pull_id, @@ -1318,7 +1321,7 @@ async fn build_events( /// preprocessor or codec errors are turned into events to the ERR port of the source/connector #[allow(clippy::too_many_arguments)] async fn build_last_events( - alias: &Alias, + alias: &alias::Connector, stream_state: &mut StreamState, ingest_ns: &mut u64, pull_id: u64, @@ -1366,7 +1369,13 @@ async fn build_last_events( } Err(e) => { // preprocessor error - let err_payload = make_error(alias, &e, stream_state.stream_id, pull_id, meta); + let err_payload = make_error( + alias, + &Error::from(e), + stream_state.stream_id, + pull_id, + meta, + ); let event = build_event( stream_state, pull_id, @@ -1382,7 +1391,7 @@ async fn build_last_events( /// create an error payload fn make_error( - connector_alias: &Alias, + connector_alias: &alias::Connector, error: &Error, stream_id: u64, pull_id: u64, diff --git a/src/connectors/tests.rs b/src/connectors/tests.rs index 17d151d2ce..860e9af060 100644 --- a/src/connectors/tests.rs +++ b/src/connectors/tests.rs @@ -63,20 +63,16 @@ use crate::{ }; use crate::{ config, - connectors::{ - self, builtin_connector_types, source::SourceMsg, Alias as ConnectorAlias, Connectivity, - StatusReport, - }, + connectors::{self, builtin_connector_types, source::SourceMsg, Connectivity, StatusReport}, errors::Result, instance::State, - pipeline, qsize, - system::flow::Alias as FlowAlias, - Event, + pipeline, qsize, Event, }; use log::{debug, info}; use std::time::Duration; use std::{collections::HashMap, time::Instant}; use tokio::{sync::oneshot, task, time::timeout}; +use tremor_common::alias::{self}; use tremor_common::{ ids::{ConnectorIdGen, Id, SourceId}, ports::{Port, ERR, IN, OUT}, @@ -99,7 +95,7 @@ impl ConnectorHarness { input_ports: Vec>, output_ports: Vec>, ) -> Result { - let alias = ConnectorAlias::new("test", alias); + let alias = alias::Connector::new("test", alias); let mut connector_id_gen = ConnectorIdGen::new(); let mut known_connectors = HashMap::new(); @@ -378,12 +374,12 @@ impl TestPipeline { self.addr.send_mgmt(pipeline::MgmtMsg::Stop).await } pub(crate) fn new(alias: String) -> Self { - let flow_id = FlowAlias::new("test"); + let flow_id = alias::Flow::new("test"); let qsize = qsize(); let (tx, rx) = bounded(qsize); let (tx_cf, rx_cf) = unbounded(); let (tx_mgmt, mut rx_mgmt) = bounded(qsize); - let pipeline_id = pipeline::Alias::new(flow_id, alias); + let pipeline_id = alias::Pipeline::new(flow_id, alias); let addr = pipeline::Addr::new(tx, tx_cf, tx_mgmt, pipeline_id); task::spawn(async move { diff --git a/src/connectors/utils/metrics.rs b/src/connectors/utils/metrics.rs index 5e03f38df2..381fea0a3b 100644 --- a/src/connectors/utils/metrics.rs +++ b/src/connectors/utils/metrics.rs @@ -14,14 +14,15 @@ use beef::Cow; use simd_json::ObjectHasher; -use tremor_common::ports::{Port, ERR, IN, OUT}; +use tremor_common::{ + alias, + ports::{Port, ERR, IN, OUT}, +}; use tremor_pipeline::metrics::{value, value_count}; use tremor_pipeline::MetricsSender; use tremor_script::EventPayload; use tremor_value::prelude::*; -use crate::connectors::Alias; - const FLOW: Cow<'static, str> = Cow::const_str("flow"); const CONNECTOR: Cow<'static, str> = Cow::const_str("connector"); const PORT: Cow<'static, str> = Cow::const_str("port"); @@ -29,7 +30,7 @@ const CONNECTOR_EVENTS: Cow<'static, str> = Cow::const_str("connector_events"); /// metrics reporter for connector sources pub(crate) struct SourceReporter { - alias: Alias, + alias: alias::Connector, metrics_out: u64, metrics_err: u64, tx: MetricsSender, @@ -38,7 +39,11 @@ pub(crate) struct SourceReporter { } impl SourceReporter { - pub(crate) fn new(alias: Alias, tx: MetricsSender, flush_interval_s: Option) -> Self { + pub(crate) fn new( + alias: alias::Connector, + tx: MetricsSender, + flush_interval_s: Option, + ) -> Self { Self { alias, metrics_out: 0, @@ -85,7 +90,7 @@ impl SourceReporter { /// metrics reporter for connector sinks pub(crate) struct SinkReporter { - alias: Alias, + alias: alias::Connector, metrics_in: u64, tx: MetricsSender, flush_interval_ns: Option, @@ -93,7 +98,11 @@ pub(crate) struct SinkReporter { } impl SinkReporter { - pub(crate) fn new(alias: Alias, tx: MetricsSender, flush_interval_s: Option) -> Self { + pub(crate) fn new( + alias: alias::Connector, + tx: MetricsSender, + flush_interval_s: Option, + ) -> Self { Self { alias, metrics_in: 0, @@ -128,7 +137,7 @@ impl SinkReporter { } } -pub(crate) fn send(tx: &MetricsSender, metric: EventPayload, alias: &Alias) { +pub(crate) fn send(tx: &MetricsSender, metric: EventPayload, alias: &alias::Connector) { use tremor_pipeline::MetricsMsg; if let Err(_e) = tx.send(MetricsMsg::new(metric, None)) { @@ -144,7 +153,7 @@ pub(crate) fn make_event_count_metrics_payload( timestamp: u64, port: Port<'static>, count: u64, - connector_id: &Alias, + connector_id: &alias::Connector, ) -> EventPayload { let mut tags = Object::with_capacity_and_hasher(2, ObjectHasher::default()); tags.insert_nocheck(FLOW, Value::from(connector_id.flow_alias().to_string())); diff --git a/src/connectors/utils/mime.rs b/src/connectors/utils/mime.rs index 2cdaf290bb..edd01f92e9 100644 --- a/src/connectors/utils/mime.rs +++ b/src/connectors/utils/mime.rs @@ -14,7 +14,7 @@ use halfbrown::HashMap; -use crate::config::NameWithConfig; +use tremor_config::NameWithConfig; const MIME_TYPES: [(&str, &str); 10] = [ ("application/json", "json"), diff --git a/src/connectors/utils/pb.rs b/src/connectors/utils/pb.rs index a208ad5f3b..7ae267c805 100644 --- a/src/connectors/utils/pb.rs +++ b/src/connectors/utils/pb.rs @@ -15,10 +15,9 @@ #![allow(dead_code)] use crate::errors::{Error, ErrorKind, Result}; -use base64::Engine; use simd_json::StaticNode; use std::collections::BTreeMap; -use tremor_common::base64::BASE64; +use tremor_common::base64::{Engine, BASE64}; use tremor_otelapis::opentelemetry::proto::metrics::v1; use tremor_value::Value; use value_trait::ValueAccess; diff --git a/src/connectors/utils/reconnect.rs b/src/connectors/utils/reconnect.rs index 16ede15a13..c381d48fb4 100644 --- a/src/connectors/utils/reconnect.rs +++ b/src/connectors/utils/reconnect.rs @@ -16,8 +16,8 @@ use crate::{ config::Reconnect, connectors::{ - sink::SinkMsg, source::SourceMsg, Addr, Alias, Connectivity, Connector, ConnectorContext, - Context, Msg, + sink::SinkMsg, source::SourceMsg, Addr, Connectivity, Connector, ConnectorContext, Context, + Msg, }, errors::{empty_error, Result}, }; @@ -28,6 +28,7 @@ use tokio::{ sync::mpsc::{channel as bounded, Sender}, task::{self, JoinHandle}, }; +use tremor_common::alias; #[derive(Debug, PartialEq, Clone)] enum ShouldRetry { @@ -190,7 +191,7 @@ pub(crate) struct ReconnectRuntime { addr: Addr, notifier: ConnectionLostNotifier, retry_task: Option>, - alias: Alias, + alias: alias::Connector, } /// Notifier that connector implementations @@ -231,7 +232,7 @@ impl ReconnectRuntime { } fn inner( addr: Addr, - alias: Alias, + alias: alias::Connector, notifier: ConnectionLostNotifier, config: &Reconnect, ) -> Self { @@ -446,7 +447,7 @@ mod tests { async fn failfast_runtime() -> Result<()> { let (tx, _rx) = bounded(qsize()); let notifier = ConnectionLostNotifier::new(tx.clone()); - let alias = Alias::new("flow", "test"); + let alias = alias::Connector::new("flow", "test"); let addr = Addr { alias: alias.clone(), source: None, @@ -478,7 +479,7 @@ mod tests { async fn backoff_runtime() -> Result<()> { let (tx, mut rx) = bounded(qsize()); let notifier = ConnectionLostNotifier::new(tx.clone()); - let alias = Alias::new("flow", "test"); + let alias = alias::Connector::new("flow", "test"); let addr = Addr { alias: alias.clone(), source: None, diff --git a/src/errors.rs b/src/errors.rs index 13913eeb8b..e05f58147e 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -151,64 +151,65 @@ error_chain! { Script(tremor_script::errors::Error, tremor_script::errors::ErrorKind); Pipeline(tremor_pipeline::errors::Error, tremor_pipeline::errors::ErrorKind); Codec(tremor_codec::errors::Error, tremor_codec::errors::ErrorKind); + Interceptor(tremor_interceptor::errors::Error, tremor_interceptor::errors::ErrorKind); } foreign_links { AddrParseError(std::net::AddrParseError); AnyhowError(anyhow::Error); AsyncChannelRecvError(async_std::channel::RecvError); AsyncChannelTryRecvError(async_std::channel::TryRecvError); - Base64Error(base64::DecodeError); + Base64Error(tremor_common::base64::DecodeError); ChannelReceiveError(std::sync::mpsc::RecvError); Clickhouse(clickhouse_rs::errors::Error); Common(tremor_common::Error); + Config(tremor_config::Error); CronError(cron::error::Error); DnsError(trust_dns_resolver::error::ResolveError); - InvalidTLSClientName(rustls::client::InvalidDnsNameError); ElasticError(elasticsearch::Error); ElasticTransportBuildError(elasticsearch::http::transport::BuildError); + EnvVarError(std::env::VarError); FromUtf8Error(std::string::FromUtf8Error); GoogleAuthError(gouth::Error); GrokError(grok::Error); + HeaderToStringError(http::header::ToStrError); Hex(hex::FromHexError); + Http(http::Error); HttpHeaderError(http::header::InvalidHeaderValue); + Hyper(hyper::Error); + InvalidHeaderName(reqwest::header::InvalidHeaderName); + InvalidMetadataValue(tonic::metadata::errors::InvalidMetadataValue); + InvalidMethod(http::method::InvalidMethod); + InvalidStatusCode(http::status::InvalidStatusCode); + InvalidTLSClientName(rustls::client::InvalidDnsNameError); Io(std::io::Error); + JoinError(tokio::task::JoinError); JsonAccessError(value_trait::AccessError); JsonError(simd_json::Error); KafkaError(rdkafka::error::KafkaError); + MimeParsingError(mime::FromStrError); ModeParseError(file_mode::ModeParseError); - ParseIntError(std::num::ParseIntError); + OneShotRecv(tokio::sync::oneshot::error::RecvError); ParseFloatError(std::num::ParseFloatError); + ParseIntError(std::num::ParseIntError); RegexError(regex::Error); ReqwestError(reqwest::Error); - InvalidHeaderName(reqwest::header::InvalidHeaderName); RustlsError(rustls::Error); + S3ByteStream(aws_smithy_http::byte_stream::error::Error); + S3Endpoint(aws_smithy_http::endpoint::error::InvalidEndpointError); + Serenity(serenity::Error); Sled(sled::Error); - SnappyError(snap::Error); + Timeout(tokio::time::error::Elapsed); TonicStatusError(tonic::Status); TonicTransportError(tonic::transport::Error); - Ws(tokio_tungstenite::tungstenite::Error); TryFromIntError(std::num::TryFromIntError); - ValueError(tremor_value::Error); - UrlParserError(url::ParseError); UriParserError(http::uri::InvalidUri); + UrlParserError(url::ParseError); Utf8Error(std::str::Utf8Error); - EnvVarError(std::env::VarError); - YamlError(serde_yaml::Error) #[doc = "Error during yaml parsing"]; - WalJson(qwal::Error); + ValueError(tremor_value::Error); WalInfailable(qwal::Error); - Serenity(serenity::Error); - InvalidMetadataValue(tonic::metadata::errors::InvalidMetadataValue); - S3Endpoint(aws_smithy_http::endpoint::error::InvalidEndpointError); - S3ByteStream(aws_smithy_http::byte_stream::error::Error); - JoinError(tokio::task::JoinError); - Timeout(tokio::time::error::Elapsed); - OneShotRecv(tokio::sync::oneshot::error::RecvError); - InvalidMethod(http::method::InvalidMethod); - Http(http::Error); - Hyper(hyper::Error); - HeaderToStringError(http::header::ToStrError); - MimeParsingError(mime::FromStrError); - InvalidStatusCode(http::status::InvalidStatusCode); + WalJson(qwal::Error); + Ws(tokio_tungstenite::tungstenite::Error); + YamlError(serde_yaml::Error) #[doc = "Error during yaml parsing"]; } errors { @@ -242,11 +243,6 @@ error_chain! { display("Unknown namespace: {}", n) } - InvalidGelfHeader(len: usize, initial: Option<[u8; 2]>) { - description("Invalid GELF header") - display("Invalid GELF header len: {}, prefix: {:?}", len, initial) - } - BadUtF8InString { description("Bad UTF8 in input string") display("Bad UTF8 in input string") @@ -317,10 +313,7 @@ error_chain! { description("Connector not found") display("Connector \"{}\" not found in Flow \"{}\"", alias, flow_id) } - InvalidInputData(msg: &'static str) { - description("Invalid Input data") - display("Invalid Input data: {}", msg) - } + GbqSinkFailed(msg: &'static str) { description("GBQ Sink failed") display("GBQ Sink failed: {}", msg) diff --git a/src/lib.rs b/src/lib.rs index f00a4ae8f4..74049bd278 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -56,10 +56,6 @@ pub(crate) mod primerge; /// pipelines pub mod pipeline; -/// Onramp Preprocessors -pub mod postprocessor; -/// Offramp Postprocessors -pub mod preprocessor; /// Tremor connector extensions pub mod connectors; diff --git a/src/pipeline.rs b/src/pipeline.rs index 809d0567b6..a77f240c51 100644 --- a/src/pipeline.rs +++ b/src/pipeline.rs @@ -20,13 +20,12 @@ use crate::{ errors::{pipe_send_e, Result}, instance::State, primerge::PriorityMerge, - system::flow, }; use futures::StreamExt; use std::{fmt, time::Duration}; use tokio::task::{self, JoinHandle}; use tokio_stream::wrappers::{ReceiverStream, UnboundedReceiverStream}; -use tremor_common::{ids::OperatorIdGen, ports::Port, time::nanotime}; +use tremor_common::{alias, ids::OperatorIdGen, ports::Port, time::nanotime}; use tremor_pipeline::{ errors::ErrorKind as PipelineErrorKind, CbAction, Event, ExecutableGraph, SignalKind, }; @@ -37,41 +36,13 @@ type Inputs = halfbrown::HashMap; type Dests = halfbrown::HashMap, Vec<(DeployEndpoint, OutputTarget)>>; type EventSet = Vec<(Port<'static>, Event)>; -#[derive(Debug, PartialEq, PartialOrd, Eq, Hash, Clone, Serialize, Deserialize)] -pub(crate) struct Alias { - flow_alias: flow::Alias, - pipeline_alias: String, -} - -impl Alias { - pub(crate) fn new( - flow_alias: impl Into, - pipeline_alias: impl Into, - ) -> Self { - Self { - flow_alias: flow_alias.into(), - pipeline_alias: pipeline_alias.into(), - } - } - - pub(crate) fn pipeline_alias(&self) -> &str { - self.pipeline_alias.as_str() - } -} - -impl std::fmt::Display for Alias { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}::{}", self.flow_alias, self.pipeline_alias) - } -} - /// Address for a pipeline #[derive(Clone)] pub struct Addr { addr: Sender>, cf_addr: UnboundedSender, mgmt_addr: Sender, - alias: Alias, + alias: alias::Pipeline, } impl Addr { @@ -81,7 +52,7 @@ impl Addr { addr: Sender>, cf_addr: UnboundedSender, mgmt_addr: Sender, - alias: Alias, + alias: alias::Pipeline, ) -> Self { Self { addr, @@ -175,7 +146,7 @@ impl TryFrom for OutputTarget { } pub(crate) fn spawn( - pipeline_alias: Alias, + pipeline_alias: alias::Pipeline, config: &tremor_pipeline::query::Query, operator_id_gen: &mut OperatorIdGen, ) -> Result { @@ -415,7 +386,7 @@ async fn send_events(eventset: &mut EventSet, dests: &mut Dests) -> Result<()> { } #[inline] -async fn send_signal(own_id: &Alias, signal: Event, dests: &mut Dests) -> Result<()> { +async fn send_signal(own_id: &alias::Pipeline, signal: Event, dests: &mut Dests) -> Result<()> { let mut destinations = dests.values_mut().flatten(); let first = destinations.next(); for (id, dest) in destinations { @@ -503,7 +474,7 @@ fn maybe_send(r: Result<()>) { /// /// currently only used for printing struct PipelineContext { - alias: Alias, + alias: alias::Pipeline, } impl std::fmt::Display for PipelineContext { @@ -512,23 +483,23 @@ impl std::fmt::Display for PipelineContext { } } -impl From<&Alias> for PipelineContext { - fn from(alias: &Alias) -> Self { +impl From<&alias::Pipeline> for PipelineContext { + fn from(alias: &alias::Pipeline) -> Self { Self { alias: alias.clone(), } } } -impl From for PipelineContext { - fn from(alias: Alias) -> Self { +impl From for PipelineContext { + fn from(alias: alias::Pipeline) -> Self { Self { alias } } } #[allow(clippy::too_many_lines)] pub(crate) async fn pipeline_task( - id: Alias, + id: alias::Pipeline, mut pipeline: ExecutableGraph, rx: Receiver>, cf_rx: UnboundedReceiver, @@ -734,17 +705,17 @@ mod tests { let query = tremor_pipeline::query::Query::parse(trickle, &*FN_REGISTRY.read()?, &aggr_reg)?; let addr = spawn( - Alias::new("report", "test-pipe1"), + alias::Pipeline::new("report", "test-pipe1"), &query, &mut operator_id_gen, )?; let addr2 = spawn( - Alias::new("report", "test-pipe2"), + alias::Pipeline::new("report", "test-pipe2"), &query, &mut operator_id_gen, )?; let addr3 = spawn( - Alias::new("report", "test-pipe3"), + alias::Pipeline::new("report", "test-pipe3"), &query, &mut operator_id_gen, )?; @@ -837,7 +808,7 @@ mod tests { let mut operator_id_gen = OperatorIdGen::new(); let trickle = r#"select event from in into out;"#; let aggr_reg = aggr_registry(); - let pipeline_id = Alias::new("flow", "test-pipe"); + let pipeline_id = alias::Pipeline::new("flow", "test-pipe"); let query = tremor_pipeline::query::Query::parse(trickle, &*FN_REGISTRY.read()?, &aggr_reg)?; let addr = spawn(pipeline_id, &query, &mut operator_id_gen)?; diff --git a/src/preprocessor.rs b/src/preprocessor.rs index ce9fb230a9..16fa7d72de 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -27,7 +27,7 @@ pub(crate) mod prelude { pub use value_trait::Builder; } use self::prelude::*; -use crate::{config::Preprocessor as PreprocessorConfig, connectors::Alias, errors::Result}; +use crate::{alias::Connector, config::Preprocessor as PreprocessorConfig, errors::Result}; //pub type Lines = lines::Lines; @@ -669,8 +669,8 @@ mod test { #[test] fn single_pre_process_head_ok() { let pre = Box::new(BadPreprocessor {}); - let alias = crate::connectors::Alias::new( - crate::system::flow::Alias::new("chucky"), + let alias = tremor_common::alias::Connector::new( + tremor_common::alias::Flow::new("chucky"), "chucky".to_string(), ); let mut ingest_ns = 0_u64; @@ -689,8 +689,8 @@ mod test { let noop = Box::new(NoOp {}); assert_eq!("nily", noop.name()); let pre = Box::new(BadPreprocessor {}); - let alias = crate::connectors::Alias::new( - crate::system::flow::Alias::new("chucky"), + let alias = tremor_common::alias::Connector::new( + tremor_common::alias::Flow::new("chucky"), "chucky".to_string(), ); let mut ingest_ns = 0_u64; @@ -707,8 +707,8 @@ mod test { #[test] fn single_pre_finish_ok() { let pre = Box::new(BadPreprocessor {}); - let alias = crate::connectors::Alias::new( - crate::system::flow::Alias::new("chucky"), + let alias = tremor_common::alias::Connector::new( + tremor_common::alias::Flow::new("chucky"), "chucky".to_string(), ); let r = finish(&mut [pre], &alias); @@ -724,8 +724,8 @@ mod test { #[test] fn preprocess_finish_head_fail() { - let alias = crate::connectors::Alias::new( - crate::system::flow::Alias::new("chucky"), + let alias = tremor_common::alias::Connector::new( + tremor_common::alias::Flow::new("chucky"), "chucky".to_string(), ); let pre = Box::new(BadFinisher {}); @@ -735,8 +735,8 @@ mod test { #[test] fn preprocess_finish_tail_fail() { - let alias = crate::connectors::Alias::new( - crate::system::flow::Alias::new("chucky"), + let alias = tremor_common::alias::Connector::new( + tremor_common::alias::Flow::new("chucky"), "chucky".to_string(), ); let noop = Box::new(NoOp {}); @@ -747,8 +747,8 @@ mod test { #[test] fn preprocess_finish_multi_ok() { - let alias = crate::connectors::Alias::new( - crate::system::flow::Alias::new("xyz"), + let alias = tremor_common::alias::Connector::new( + tremor_common::alias::Flow::new("xyz"), "xyz".to_string(), ); let noop1 = Box::new(NoOp {}); diff --git a/src/system.rs b/src/system.rs index b79ad28f6e..6ec12629f3 100644 --- a/src/system.rs +++ b/src/system.rs @@ -158,7 +158,7 @@ impl World { /// * if we fail to send the request or fail to receive it pub async fn get_flow(&self, flow_id: String) -> Result { let (flow_tx, flow_rx) = oneshot::channel(); - let flow_id = flow::Alias::new(flow_id); + let flow_id = tremor_common::alias::Flow::new(flow_id); self.system .send(flow_supervisor::Msg::GetFlow(flow_id.clone(), flow_tx)) .await?; diff --git a/src/system/flow.rs b/src/system/flow.rs index 0210bb9d1d..d3dc047960 100644 --- a/src/system/flow.rs +++ b/src/system/flow.rs @@ -32,53 +32,15 @@ use std::collections::HashSet; use std::time::Duration; use tokio::{task, time::timeout}; use tokio_stream::wrappers::ReceiverStream; -use tremor_common::ids::{ConnectorIdGen, OperatorIdGen}; +use tremor_common::{ + alias, + ids::{ConnectorIdGen, OperatorIdGen}, +}; use tremor_script::{ - ast::{self, ConnectStmt, DeployFlow, Helper}, + ast::{self, ConnectStmt, Helper}, errors::{error_generic, not_defined_err}, }; -/// unique identifier of a flow instance within a tremor instance -#[derive(Debug, PartialEq, PartialOrd, Eq, Hash, Clone, Serialize, Deserialize)] -pub struct Alias(String); - -impl Alias { - /// construct a new flow if from some stringy thingy - pub fn new(alias: impl Into) -> Self { - Self(alias.into()) - } - - /// reference this id as a stringy thing again - #[must_use] - pub fn as_str(&self) -> &str { - self.0.as_str() - } -} - -impl From<&DeployFlow<'_>> for Alias { - fn from(f: &DeployFlow) -> Self { - Self(f.instance_alias.to_string()) - } -} - -impl From<&str> for Alias { - fn from(e: &str) -> Self { - Self(e.to_string()) - } -} - -impl From for Alias { - fn from(alias: String) -> Self { - Self(alias) - } -} - -impl std::fmt::Display for Alias { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.0.fmt(f) - } -} - #[derive(Debug)] /// Control Plane message accepted by each binding control plane handler pub(crate) enum Msg { @@ -97,7 +59,7 @@ pub(crate) enum Msg { /// The sender expects a Result, which makes it easier to signal errors on the message handling path to the sender Report(Sender>), /// Get the addr for a single connector - GetConnector(connectors::Alias, Sender>), + GetConnector(alias::Connector, Sender>), /// Get the addresses for all connectors of this flow GetConnectors(Sender>>), } @@ -106,7 +68,7 @@ type Addr = Sender; /// A deployed Flow instance #[derive(Debug, Clone)] pub struct Flow { - alias: Alias, + alias: alias::Flow, addr: Addr, } @@ -114,15 +76,15 @@ pub struct Flow { #[derive(Serialize, Deserialize, Debug)] pub struct StatusReport { /// the id of the instance this report describes - pub alias: Alias, + pub alias: alias::Flow, /// the current state pub status: State, /// the crated connectors - pub connectors: Vec, + pub connectors: Vec, } impl Flow { - pub(crate) fn id(&self) -> &Alias { + pub(crate) fn id(&self) -> &alias::Flow { &self.alias } pub(crate) async fn stop(&self, tx: Sender>) -> Result<()> { @@ -147,7 +109,7 @@ impl Flow { /// # Errors /// if the flow is not running anymore and can't be reached or if the connector is not part of the flow pub async fn get_connector(&self, connector_alias: String) -> Result { - let connector_alias = connectors::Alias::new(self.id().clone(), connector_alias); + let connector_alias = alias::Connector::new(self.id().clone(), connector_alias); let (tx, mut rx) = bounded(1); self.addr .send(Msg::GetConnector(connector_alias, tx)) @@ -192,7 +154,7 @@ impl Flow { ) -> Result { let mut pipelines = HashMap::new(); let mut connectors = HashMap::new(); - let flow_alias = Alias::from(&flow); + let flow_alias = alias::Flow::from(&flow); for create in &flow.defn.creates { let alias: &str = &create.instance_alias; @@ -200,7 +162,7 @@ impl Flow { ast::CreateTargetDefinition::Connector(defn) => { let mut defn = defn.clone(); defn.params.ingest_creational_with(&create.with)?; - let connector_alias = connectors::Alias::new(flow_alias.clone(), alias); + let connector_alias = alias::Connector::new(flow_alias.clone(), alias); let config = crate::Connector::from_defn(&connector_alias, &defn)?; let builder = known_connectors @@ -228,7 +190,7 @@ impl Flow { defn.to_query(&create.with, &mut helper)? }; - let pipeline_alias = pipeline::Alias::new(flow_alias.clone(), alias); + let pipeline_alias = alias::Pipeline::new(flow_alias.clone(), alias); let pipeline = tremor_pipeline::query::Query( tremor_script::query::Query::from_query(query), ); @@ -394,7 +356,7 @@ async fn link( /// task handling flow instance control plane #[allow(clippy::too_many_lines)] fn spawn_task( - id: Alias, + id: alias::Flow, pipelines: &HashMap, connectors: HashMap, links: &[ConnectStmt], @@ -603,7 +565,7 @@ fn spawn_task( // TODO: aggregate states of all containing instances let connectors = connectors .keys() - .map(|c| connectors::Alias::new(id.clone(), c)) + .map(|c| alias::Connector::new(id.clone(), c)) .collect(); let report = StatusReport { alias: id.clone(), @@ -824,7 +786,7 @@ mod tests { } async fn build( &self, - _alias: &Alias, + _alias: &alias::Connector, _config: &ConnectorConfig, _kill_switch: &KillSwitch, ) -> Result> { diff --git a/src/system/flow_supervisor.rs b/src/system/flow_supervisor.rs index 3b357050c5..fc2feee79e 100644 --- a/src/system/flow_supervisor.rs +++ b/src/system/flow_supervisor.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use super::flow::{Alias, Flow}; +use super::flow::Flow; use super::KillSwitch; use crate::system::DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT; use crate::{ @@ -33,7 +33,10 @@ use tokio::{ task::{self, JoinHandle}, time::timeout, }; -use tremor_common::ids::{ConnectorIdGen, OperatorIdGen}; +use tremor_common::{ + alias, + ids::{ConnectorIdGen, OperatorIdGen}, +}; use tremor_script::ast::DeployFlow; pub(crate) type Channel = Sender; @@ -55,7 +58,7 @@ pub(crate) enum Msg { builder: Box, }, GetFlows(oneshot::Sender>>), - GetFlow(Alias, oneshot::Sender>), + GetFlow(alias::Flow, oneshot::Sender>), /// Initiate the Quiescence process Drain(oneshot::Sender>), /// stop this manager @@ -64,7 +67,7 @@ pub(crate) enum Msg { #[derive(Debug)] pub(crate) struct FlowSupervisor { - flows: HashMap, + flows: HashMap, operator_id_gen: OperatorIdGen, connector_id_gen: ConnectorIdGen, known_connectors: connectors::Known, @@ -96,7 +99,7 @@ impl FlowSupervisor { sender: oneshot::Sender>, kill_switch: &KillSwitch, ) { - let id = Alias::from(&flow); + let id = alias::Flow::from(&flow); let res = match self.flows.entry(id.clone()) { Entry::Occupied(_occupied) => Err(ErrorKind::DuplicateFlow(id.to_string()).into()), Entry::Vacant(vacant) => Flow::start( @@ -123,7 +126,7 @@ impl FlowSupervisor { "Error sending ListFlows response: {e}" ); } - fn handle_get_flow(&self, id: &Alias, reply_tx: oneshot::Sender>) { + fn handle_get_flow(&self, id: &alias::Flow, reply_tx: oneshot::Sender>) { log_error!( reply_tx .send( diff --git a/tremor-api/src/api/model.rs b/tremor-api/src/api/model.rs index ddb3a37e7f..2d2f5c3b18 100644 --- a/tremor-api/src/api/model.rs +++ b/tremor-api/src/api/model.rs @@ -14,16 +14,17 @@ use crate::api::prelude::*; use halfbrown::HashMap; +use tremor_common::alias; use tremor_runtime::{ connectors::{Connectivity, StatusReport as ConnectorStatusReport}, instance::State, - system::flow::{Alias as FlowAlias, StatusReport as FlowStatusReport}, + system::flow::StatusReport as FlowStatusReport, }; use tremor_script::ast::DeployEndpoint; #[derive(Clone, Debug, Deserialize, Serialize)] pub(crate) struct ApiFlowStatusReport { - pub(crate) alias: FlowAlias, + pub(crate) alias: alias::Flow, pub(crate) status: State, pub(crate) connectors: Vec, } diff --git a/tremor-cli/Cargo.toml b/tremor-cli/Cargo.toml index dd1d624271..8cdafb8111 100644 --- a/tremor-cli/Cargo.toml +++ b/tremor-cli/Cargo.toml @@ -49,6 +49,7 @@ surf = { version = "=2.3.2", default-features = false, features = [ ] } tremor-api = { version = "0.13.0-rc.16", path = "../tremor-api" } tremor-codec = { version = "0.13.0-rc.16", path = "../tremor-codec" } +tremor-interceptor = { version = "0.13.0-rc.16", path = "../tremor-interceptor" } tremor-common = { version = "0.13.0-rc.16", path = "../tremor-common" } tremor-pipeline = { version = "0.13.0-rc.16", path = "../tremor-pipeline" } tremor-runtime = { version = "0.13.0-rc.16", path = "../" } diff --git a/tremor-cli/src/errors.rs b/tremor-cli/src/errors.rs index bdbd0df4dc..a5849e08d2 100644 --- a/tremor-cli/src/errors.rs +++ b/tremor-cli/src/errors.rs @@ -44,6 +44,7 @@ error_chain! { Pipeline(tremor_pipeline::errors::Error, tremor_pipeline::errors::ErrorKind); Runtime(tremor_runtime::errors::Error, tremor_runtime::errors::ErrorKind); Codec(tremor_codec::errors::Error, tremor_codec::errors::ErrorKind); + Interceptor(tremor_interceptor::errors::Error, tremor_interceptor::errors::ErrorKind); } foreign_links { Value(tremor_value::Error); diff --git a/tremor-cli/src/run.rs b/tremor-cli/src/run.rs index 4665c50c48..0d825dd0c6 100644 --- a/tremor-cli/src/run.rs +++ b/tremor-cli/src/run.rs @@ -18,7 +18,7 @@ use crate::errors::Result; use crate::util::{get_source_kind, highlight, slurp_string, SourceKind}; use futures::executor::block_on; use std::io::prelude::*; -use std::io::{self, BufReader, BufWriter, Read, Write}; +use std::io::{self, BufReader, BufWriter}; use tremor_codec::Codec; use tremor_common::{ file, @@ -26,22 +26,16 @@ use tremor_common::{ ports::{Port, IN}, time::nanotime, }; +use tremor_interceptor::{postprocessor, preprocessor}; use tremor_pipeline::{Event, EventId}; -use tremor_runtime::{ - config, - postprocessor::Postprocessor, - preprocessor::Preprocessor, - system::{World, WorldConfig}, -}; +use tremor_runtime::system::{World, WorldConfig}; use tremor_script::{ arena::Arena, - ctx::EventContext, highlighter::{Error as HighlighterError, Highlighter, Term as TermHighlighter}, lexer::Lexer, prelude::*, query::Query, script::{AggrType, Return, Script}, - EventPayload, ValueAndMeta, }; use tremor_value::Value; @@ -50,7 +44,7 @@ struct Ingress { is_pretty: bool, buf: [u8; 4096], buffer: Box, - preprocessor: Box, + preprocessor: Box, codec: Box, } @@ -65,14 +59,14 @@ impl Ingress { Box::new(BufReader::new(crate::open_file(&cmd.infile, None)?)) }; - let codec = tremor_codec::resolve(&config::Codec::from(&cmd.decoder)); + let codec = tremor_codec::resolve(&tremor_codec::Config::from(&cmd.decoder)); if let Err(_e) = codec { eprintln!("Error Codec {} not found error.", cmd.decoder); // ALLOW: main.rs std::process::exit(1); } let codec = codec?; - let preprocessor = tremor_runtime::preprocessor::lookup(&cmd.preprocessor); + let preprocessor = preprocessor::lookup(&cmd.preprocessor); if let Err(_e) = preprocessor { eprintln!("Error Preprocessor {} not found error.", cmd.preprocessor); // ALLOW: main.rs @@ -143,7 +137,7 @@ struct Egress { is_pretty: bool, buffer: Box, codec: Box, - postprocessor: Box, + postprocessor: Box, } impl Egress { @@ -154,7 +148,7 @@ impl Egress { Box::new(BufWriter::new(file::create(&cmd.outfile)?)) }; - let codec = tremor_codec::resolve(&config::Codec::from(&cmd.encoder)); + let codec = tremor_codec::resolve(&tremor_codec::Config::from(&cmd.encoder)); if let Err(_e) = codec { eprintln!("Error Codec {} not found error.", cmd.encoder); // ALLOW: main.rs @@ -162,7 +156,7 @@ impl Egress { } let codec = codec?; - let postprocessor = tremor_runtime::postprocessor::lookup(&cmd.postprocessor); + let postprocessor = postprocessor::lookup(&cmd.postprocessor); if let Err(_e) = postprocessor { eprintln!("Error Postprocessor {} not found error.", cmd.postprocessor); // ALLOW: main.rs diff --git a/tremor-codec/Cargo.toml b/tremor-codec/Cargo.toml index cd90a44631..d4f22cac81 100644 --- a/tremor-codec/Cargo.toml +++ b/tremor-codec/Cargo.toml @@ -13,6 +13,7 @@ version = "0.13.0-rc.16" [dependencies] tremor-value = { version = "0.13.0-rc.16", path = "../tremor-value" } tremor-common = { version = "0.13.0-rc.16", path = "../tremor-common" } +tremor-config = { version = "0.13.0-rc.16", path = "../tremor-config" } tokio = { version = "1.32", features = ["full"] } async-trait = "0.1" error-chain = "0.12" @@ -21,7 +22,6 @@ simd-json-derive = "0.11" value-trait = "0.6" beef = "0.5" test-case = "3.1" -proptest = "1.1" byteorder = "1.5" itoa = "1" ryu = "1" @@ -61,3 +61,6 @@ serde = "1" rmp-serde = "1.1" syslog_loose = "0.19" serde_yaml = "0.9" + +[dev-dependencies] +proptest = "1.1" diff --git a/tremor-codec/src/lib.rs b/tremor-codec/src/lib.rs index fced2d3e0e..7f58983b01 100644 --- a/tremor-codec/src/lib.rs +++ b/tremor-codec/src/lib.rs @@ -50,8 +50,6 @@ mod codec { pub(crate) mod yaml; } -/// Codec configuration -pub mod config; pub use codec::*; mod prelude { @@ -61,6 +59,9 @@ mod prelude { pub use tremor_value::{literal, Object, Value}; } +/// A Codec +pub type Config = tremor_config::NameWithConfig; + #[async_trait::async_trait] /// The codec trait, to encode and decode data pub trait Codec: Send + Sync { @@ -118,7 +119,7 @@ impl Debug for dyn Codec { /// /// # Errors /// * if the codec doesn't exist -pub fn resolve(config: &config::Codec) -> Result> { +pub fn resolve(config: &Config) -> Result> { match config.name.as_str() { "avro" => avro::Avro::from_config(config.config.as_ref()), "kafka-schema-registry" => kafka_schema_registry::Ksr::from_config(config.config.as_ref()), @@ -141,7 +142,7 @@ pub fn resolve(config: &config::Codec) -> Result> { #[cfg(test)] mod test { - use crate::config::NameWithConfig; + use tremor_config::NameWithConfig; use tremor_value::literal; #[test] diff --git a/tremor-common/src/alias.rs b/tremor-common/src/alias.rs new file mode 100644 index 0000000000..8dbd46a4b2 --- /dev/null +++ b/tremor-common/src/alias.rs @@ -0,0 +1,114 @@ +// Copyright 2020-2021, The Tremor Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use serde::{Deserialize, Serialize}; + +/// unique identifier of a flow instance within a tremor instance +#[derive(Debug, PartialEq, PartialOrd, Eq, Hash, Clone, Serialize, Deserialize)] +pub struct Flow(String); + +impl Flow { + /// construct a new flow if from some stringy thingy + pub fn new(alias: impl Into) -> Self { + Self(alias.into()) + } + + /// reference this id as a stringy thing again + #[must_use] + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +impl From<&str> for Flow { + fn from(e: &str) -> Self { + Self(e.to_string()) + } +} + +impl From for Flow { + fn from(alias: String) -> Self { + Self(alias) + } +} + +impl std::fmt::Display for Flow { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +/// unique instance alias/id of a connector within a deployment +#[derive(Debug, PartialEq, PartialOrd, Eq, Hash, Clone, Serialize, Deserialize)] +pub struct Connector { + flow_alias: Flow, + connector_alias: String, +} + +impl Connector { + /// construct a new `ConnectorId` from the id of the containing flow and the connector instance id + pub fn new(flow_alias: impl Into, connector_alias: impl Into) -> Self { + Self { + flow_alias: flow_alias.into(), + connector_alias: connector_alias.into(), + } + } + + /// get a reference to the flow alias + #[must_use] + pub fn flow_alias(&self) -> &Flow { + &self.flow_alias + } + + /// get a reference to the connector alias + #[must_use] + pub fn connector_alias(&self) -> &str { + self.connector_alias.as_str() + } +} + +impl std::fmt::Display for Connector { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}::{}", self.flow_alias, self.connector_alias) + } +} + +/// unique instance alias/id of a pipeline within a deployment +#[derive(Debug, PartialEq, PartialOrd, Eq, Hash, Clone, Serialize, Deserialize)] +pub struct Pipeline { + flow_alias: Flow, + pipeline_alias: String, +} + +impl Pipeline { + /// construct a new `Pipeline` from the id of the containing flow and the pipeline instance id + pub fn new(flow_alias: impl Into, pipeline_alias: impl Into) -> Self { + Self { + flow_alias: flow_alias.into(), + pipeline_alias: pipeline_alias.into(), + } + } + + /// get a reference to the Pipeline alias + #[must_use] + pub fn pipeline_alias(&self) -> &str { + self.pipeline_alias.as_str() + } +} + +impl std::fmt::Display for Pipeline { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}::{}", self.flow_alias, self.pipeline_alias) + } +} diff --git a/tremor-common/src/base64.rs b/tremor-common/src/base64.rs index aeb0101a22..d76d288a62 100644 --- a/tremor-common/src/base64.rs +++ b/tremor-common/src/base64.rs @@ -13,6 +13,7 @@ // limitations under the License. use base64::alphabet::STANDARD as STANDARD_ALPHABET; use base64::engine::{DecodePaddingMode, GeneralPurpose, GeneralPurposeConfig}; +pub use base64::{DecodeError, Engine}; /** * Our very own base64 engine, that produces base64 with padding, but accepts base64 with and without padding. diff --git a/tremor-common/src/lib.rs b/tremor-common/src/lib.rs index 3a61e44163..1eea6c2996 100644 --- a/tremor-common/src/lib.rs +++ b/tremor-common/src/lib.rs @@ -51,8 +51,22 @@ pub mod base64; /// URL with defaults pub mod url; +/// Aliases for naming tremor elements +pub mod alias; + pub use errors::Error; +/// function that always returns true +#[must_use] +pub fn default_true() -> bool { + true +} +/// function that always returns false +#[must_use] +pub fn default_false() -> bool { + false +} + #[cfg(test)] mod tests { #[test] diff --git a/tremor-config/Cargo.toml b/tremor-config/Cargo.toml new file mode 100644 index 0000000000..c9e993aac2 --- /dev/null +++ b/tremor-config/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "tremor-config" +edition = "2021" +authors = ["The Tremor Team"] +description = "Tremor Script Interpreter" +documentation = "https://docs.rs/tremor-script" +license = "Apache-2.0" +readme = "README.md" +repository = "https://github.com/tremor-rs/tremor-runtime" +version = "0.13.0-rc.16" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +tremor-value = { path = "../tremor-value" } +serde = "1" +simd-json = "0.11" diff --git a/tremor-codec/src/config.rs b/tremor-config/src/lib.rs similarity index 80% rename from tremor-codec/src/config.rs rename to tremor-config/src/lib.rs index b31af5222b..92c3e0c2db 100644 --- a/tremor-codec/src/config.rs +++ b/tremor-config/src/lib.rs @@ -1,7 +1,7 @@ // Copyright 2020-2021, The Tremor Team // // Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. +// you may not use this file except in compliance with the. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::errors::Result; use serde::Deserialize; use tremor_value::prelude::*; @@ -60,9 +59,9 @@ impl<'v> serde::Deserialize<'v> for NameWithConfig { } impl<'v> TryFrom<&Value<'v>> for NameWithConfig { - type Error = crate::errors::Error; + type Error = Error; - fn try_from(value: &Value) -> Result { + fn try_from(value: &Value) -> Result { if let Some(name) = value.as_str() { Ok(Self::from(name)) } else if let Some(name) = value.get_str("name") { @@ -71,11 +70,27 @@ impl<'v> TryFrom<&Value<'v>> for NameWithConfig { config: value.get("config").map(Value::clone_static), }) } else { - Err(format!("Invalid codec: {value}").into()) + Err(Error::InvalidConfig(value.encode())) } } } +/// Error for confdig +#[derive(Debug, Clone, PartialEq)] +pub enum Error { + InvalidConfig(String), +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> { + match self { + Self::InvalidConfig(v) => write!(f, "Invalid config: {v}"), + } + } +} + +impl std::error::Error for Error {} + impl From<&str> for NameWithConfig { fn from(name: &str) -> Self { Self { @@ -95,12 +110,23 @@ impl From for NameWithConfig { } } -/// A Codec -pub type Codec = NameWithConfig; -/// A Preprocessor -pub type Preprocessor = NameWithConfig; -/// A Postprocessor -pub type Postprocessor = NameWithConfig; +/// Trait for detecting errors in config and the key names are included in errors +pub trait Impl { + /// deserialises the config into a struct and returns nice errors + /// this doesn't need to be overwritten in most cases. + /// + /// # Errors + /// if the Configuration is invalid + fn new(config: &tremor_value::Value) -> Result + where + Self: serde::de::Deserialize<'static>, + { + tremor_value::structurize(config.clone_static()) + } +} + +/// A configuration map +pub type Map = Option>; #[cfg(test)] mod test { diff --git a/tremor-interceptor/Cargo.toml b/tremor-interceptor/Cargo.toml new file mode 100644 index 0000000000..3f5ffb56cd --- /dev/null +++ b/tremor-interceptor/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "tremor-interceptor" +edition = "2021" +authors = ["The Tremor Team"] +description = "Tremor Runtime" +documentation = "https://docs.tremor.rs" +homepage = "https://www.tremor.rs" +license = "Apache-2.0" +readme = "README.md" +repository = "https://github.com/tremor-rs/tremor-runtime" +version = "0.13.0-rc.16" +rust-version = "1.62" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +error-chain = "0.12" +tremor-value = { path = "../tremor-value" } +tremor-config = { path = "../tremor-config" } +tremor-common = { path = "../tremor-common" } +log = "0.4" +serde = { version = "1", features = ["derive"] } +simd-json = "0.11" +libflate = "2" +xz2 = "0.1" +lz4 = "1" +snap = "1" +zstd = "0.13" +byteorder = "1" +value-trait = "0.6" +rand = "0.8" +bytes = "1.5.0" +memchr = "2.6" + +[dev-dependencies] +proptest = "1.1" diff --git a/tremor-interceptor/src/errors.rs b/tremor-interceptor/src/errors.rs new file mode 100644 index 0000000000..ae1aad0af6 --- /dev/null +++ b/tremor-interceptor/src/errors.rs @@ -0,0 +1,66 @@ +// Copyright 2020-2021, The Tremor Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//NOTE: error_chain +#![allow(deprecated, missing_docs, clippy::large_enum_variant)] + +use error_chain::error_chain; +// use value_trait::prelude::*; + +#[cfg(test)] +impl PartialEq for Error { + fn eq(&self, _other: &Self) -> bool { + // This might be Ok since we try to compare Result in tests + false + } +} + +error_chain! { + links { + } + foreign_links { + Base64Error(tremor_common::base64::DecodeError); + Config(tremor_config::Error); + Io(std::io::Error); + ParseIntError(std::num::ParseIntError); + SnappyError(snap::Error); + TryFromIntError(std::num::TryFromIntError); + Utf8Error(std::str::Utf8Error); + ValueError(tremor_value::Error); + + } + + errors { + + InvalidGelfHeader(len: usize, initial: Option<[u8; 2]>) { + description("Invalid GELF header") + display("Invalid GELF header len: {}, prefix: {:?}", len, initial) + } + + MissingConfiguration(s: String) { + description("Missing Configuration") + display("Missing Configuration for {}", s) + } + + InvalidConfiguration(configured_thing: String, msg: String) { + description("Invalid Configuration") + display("Invalid Configuration for {}: {}", configured_thing, msg) + } + + InvalidInputData(msg: &'static str) { + description("Invalid Input data") + display("Invalid Input data: {}", msg) + } + } +} diff --git a/tremor-interceptor/src/lib.rs b/tremor-interceptor/src/lib.rs new file mode 100644 index 0000000000..955e572bee --- /dev/null +++ b/tremor-interceptor/src/lib.rs @@ -0,0 +1,35 @@ +// Copyright 2020-2021, The Tremor Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Tremor interceptors (pre and postprocessors) + +#![deny(warnings)] +#![deny(missing_docs)] +#![recursion_limit = "1024"] +#![deny( + clippy::all, + clippy::unwrap_used, + clippy::unnecessary_unwrap, + clippy::pedantic, + clippy::mod_module_files +)] + +/// Postprocessors +pub mod postprocessor; + +/// Preprocessors +pub mod preprocessor; + +/// Errors +pub mod errors; diff --git a/src/postprocessor.rs b/tremor-interceptor/src/postprocessor.rs similarity index 96% rename from src/postprocessor.rs rename to tremor-interceptor/src/postprocessor.rs index a9054fd998..7ee6660540 100644 --- a/src/postprocessor.rs +++ b/tremor-interceptor/src/postprocessor.rs @@ -21,14 +21,17 @@ pub(crate) mod length_prefixed; pub(crate) mod separate; pub(crate) mod textual_length_prefixed; -use crate::config::Postprocessor as PostprocessorConfig; use crate::errors::Result; +use log::error; use std::default::Default; use tremor_common::time::nanotime; /// Set of Postprocessors pub type Postprocessors = Vec>; use std::{mem, str}; +/// Configuration for a postprocessor +pub type Config = tremor_config::NameWithConfig; + trait PostprocessorState {} /// Postprocessor trait pub trait Postprocessor: Send + Sync { @@ -59,7 +62,7 @@ pub trait Postprocessor: Send + Sync { /// /// * Errors if the postprocessor is not known -pub fn lookup_with_config(config: &PostprocessorConfig) -> Result> { +pub fn lookup_with_config(config: &Config) -> Result> { match config.name.as_str() { "chunk" => Ok(Box::new(chunk::Chunk::from_config(config.config.as_ref())?)), "compress" => Ok(Box::new(compress::Compress::from_config( @@ -83,7 +86,7 @@ pub fn lookup_with_config(config: &PostprocessorConfig) -> Result Result> { - lookup_with_config(&PostprocessorConfig::from(name)) + lookup_with_config(&Config::from(name)) } /// Given the slice of postprocessor names: Lookup each of them and return them as `Postprocessors` @@ -91,7 +94,7 @@ pub fn lookup(name: &str) -> Result> { /// # Errors /// /// * If any postprocessor is not known. -pub fn make_postprocessors(postprocessors: &[PostprocessorConfig]) -> Result { +pub fn make_postprocessors(postprocessors: &[Config]) -> Result { postprocessors.iter().map(lookup_with_config).collect() } @@ -167,7 +170,7 @@ pub fn finish(postprocessors: &mut [Box], alias: &str) -> Res #[cfg(test)] mod test { use super::*; - use crate::config::NameWithConfig; + use tremor_config::NameWithConfig; use tremor_value::literal; const LOOKUP_TABLE: [&str; 6] = [ diff --git a/src/postprocessor/base64.rs b/tremor-interceptor/src/postprocessor/base64.rs similarity index 92% rename from src/postprocessor/base64.rs rename to tremor-interceptor/src/postprocessor/base64.rs index 6e052e09b0..920e38a02d 100644 --- a/src/postprocessor/base64.rs +++ b/tremor-interceptor/src/postprocessor/base64.rs @@ -15,9 +15,8 @@ //! Encodes raw data into base64 encoded bytes. use super::Postprocessor; -use crate::Result; -use base64::Engine; -use tremor_common::base64::BASE64; +use crate::errors::Result; +use tremor_common::base64::{Engine, BASE64}; #[derive(Default)] pub(crate) struct Base64 {} diff --git a/src/postprocessor/chunk.rs b/tremor-interceptor/src/postprocessor/chunk.rs similarity index 99% rename from src/postprocessor/chunk.rs rename to tremor-interceptor/src/postprocessor/chunk.rs index fb8dc39035..d6573adcef 100644 --- a/src/postprocessor/chunk.rs +++ b/tremor-interceptor/src/postprocessor/chunk.rs @@ -47,7 +47,9 @@ //! ``` use super::Postprocessor; -use crate::errors::{Error, Kind as ErrorKind, Result}; +use crate::errors::{Error, ErrorKind, Result}; +use log::warn; +use serde::Deserialize; use tremor_value::Value; #[derive(Deserialize, Debug, Clone)] diff --git a/src/postprocessor/compress.rs b/tremor-interceptor/src/postprocessor/compress.rs similarity index 100% rename from src/postprocessor/compress.rs rename to tremor-interceptor/src/postprocessor/compress.rs diff --git a/src/postprocessor/gelf_chunking.rs b/tremor-interceptor/src/postprocessor/gelf_chunking.rs similarity index 100% rename from src/postprocessor/gelf_chunking.rs rename to tremor-interceptor/src/postprocessor/gelf_chunking.rs diff --git a/src/postprocessor/ingest_ns.rs b/tremor-interceptor/src/postprocessor/ingest_ns.rs similarity index 97% rename from src/postprocessor/ingest_ns.rs rename to tremor-interceptor/src/postprocessor/ingest_ns.rs index 4edd191ab6..c97a65d145 100644 --- a/src/postprocessor/ingest_ns.rs +++ b/tremor-interceptor/src/postprocessor/ingest_ns.rs @@ -15,7 +15,7 @@ //! Prepends the event ingest timestamp as an unsigned 64 bit big-endian integer before the evetn payload. use super::Postprocessor; -use crate::Result; +use crate::errors::Result; use byteorder::{BigEndian, WriteBytesExt}; use std::io::Write; diff --git a/src/postprocessor/length_prefixed.rs b/tremor-interceptor/src/postprocessor/length_prefixed.rs similarity index 97% rename from src/postprocessor/length_prefixed.rs rename to tremor-interceptor/src/postprocessor/length_prefixed.rs index caf7831bc5..601bdeb2de 100644 --- a/src/postprocessor/length_prefixed.rs +++ b/tremor-interceptor/src/postprocessor/length_prefixed.rs @@ -19,7 +19,7 @@ use std::io::Write; use byteorder::{BigEndian, WriteBytesExt}; use super::Postprocessor; -use crate::Result; +use crate::errors::Result; #[derive(Clone, Default)] pub(crate) struct LengthPrefixed {} diff --git a/src/postprocessor/separate.rs b/tremor-interceptor/src/postprocessor/separate.rs similarity index 96% rename from src/postprocessor/separate.rs rename to tremor-interceptor/src/postprocessor/separate.rs index 5c0b34903e..b023e571ec 100644 --- a/src/postprocessor/separate.rs +++ b/tremor-interceptor/src/postprocessor/separate.rs @@ -19,9 +19,10 @@ //! | `separator` | The separator to append after each event's byte stream | no | `\n` | use super::Postprocessor; -use crate::errors::{Kind as ErrorKind, Result}; +use crate::errors::{ErrorKind, Result}; use crate::preprocessor::separate::{default_separator, DEFAULT_SEPARATOR}; -use tremor_pipeline::{ConfigImpl, ConfigMap}; +use serde::Deserialize; +use tremor_config::{Impl as ConfigImpl, Map as ConfigMap}; #[derive(Clone, Debug, Deserialize)] #[serde(deny_unknown_fields)] @@ -30,7 +31,7 @@ pub struct Config { separator: String, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} pub(crate) struct Separate { separator: u8, diff --git a/src/postprocessor/textual_length_prefixed.rs b/tremor-interceptor/src/postprocessor/textual_length_prefixed.rs similarity index 97% rename from src/postprocessor/textual_length_prefixed.rs rename to tremor-interceptor/src/postprocessor/textual_length_prefixed.rs index 78020860ee..5ba60c31a1 100644 --- a/src/postprocessor/textual_length_prefixed.rs +++ b/tremor-interceptor/src/postprocessor/textual_length_prefixed.rs @@ -15,7 +15,7 @@ //! Prefixes the data with the length of the event data in bytes as an unsigned 64 bit big-endian integer. use super::Postprocessor; -use crate::Result; +use crate::errors::Result; use std::io::Write; #[derive(Clone, Default)] diff --git a/tremor-interceptor/src/preprocessor.rs b/tremor-interceptor/src/preprocessor.rs new file mode 100644 index 0000000000..47b882f6c5 --- /dev/null +++ b/tremor-interceptor/src/preprocessor.rs @@ -0,0 +1,766 @@ +// Copyright 2020-2021, The Tremor Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod base64; +mod decompress; +pub(crate) mod gelf_chunking; +mod ingest_ns; +mod length_prefixed; +mod remove_empty; +pub(crate) mod separate; +mod textual_length_prefixed; +pub(crate) mod prelude { + pub use super::Preprocessor; + pub use crate::errors::Result; + pub use tremor_common::{default_false, default_true}; + pub use tremor_value::Value; + pub use value_trait::Builder; +} +use self::prelude::*; +use crate::errors::Result; +use log::error; +use tremor_common::alias::Connector as Alias; + +/// Configuration for a preprocessor +pub type Config = tremor_config::NameWithConfig; + +//pub type Lines = lines::Lines; + +/// A set of preprocessors +pub type Preprocessors = Vec>; + +/// Preprocessor trait +pub trait Preprocessor: Sync + Send { + /// Canonical name for this preprocessor + fn name(&self) -> &str; + /// process data + /// + /// # Errors + /// + /// * Errors if the data can not processed + fn process( + &mut self, + ingest_ns: &mut u64, + data: &[u8], + meta: Value<'static>, + ) -> Result, Value<'static>)>>; + + /// Finish processing data and emit anything that might be left. + /// Takes a `data` buffer of input data, that is potentially empty, + /// especially if this is the first preprocessor in a chain. + /// + /// # Errors + /// + /// * if finishing fails for some reason lol + fn finish( + &mut self, + _data: Option<&[u8]>, + _meta: Option>, + ) -> Result, Value<'static>)>> { + Ok(vec![]) + } +} + +/// Lookup a preprocessor implementation via its configuration +/// +/// # Errors +/// +/// * Errors if the preprocessor is not known +pub fn lookup_with_config(config: &Config) -> Result> { + match config.name.as_str() { + "separate" => Ok(Box::new(separate::Separate::from_config(&config.config)?)), + "base64" => Ok(Box::::default()), + "decompress" => Ok(Box::new(decompress::Decompress::from_config( + config.config.as_ref(), + )?)), + "remove-empty" => Ok(Box::::default()), + "gelf-chunking" => Ok(Box::::default()), + "ingest-ns" => Ok(Box::::default()), + "length-prefixed" => Ok(Box::::default()), + "textual-length-prefixed" => { + Ok(Box::::default()) + } + name => Err(format!("Preprocessor '{name}' not found.").into()), + } +} + +/// Lookup a preprocessor implementation via its unique id +/// +/// # Errors +/// +/// * if the preprocessor with `name` is not known +pub fn lookup(name: &str) -> Result> { + lookup_with_config(&Config::from(name)) +} + +/// Given the slice of preprocessor names: Look them up and return them as `Preprocessors`. +/// +/// # Errors +/// +/// * If the preprocessor is not known. +pub fn make_preprocessors(preprocessors: &[Config]) -> Result { + preprocessors.iter().map(lookup_with_config).collect() +} + +/// Canonical way to preprocess data before it is fed to a codec for decoding. +/// +/// Preprocessors might split up the given data in multiple chunks. Each of those +/// chunks must be seperately decoded by a `Codec`. +/// +/// # Errors +/// +/// * If a preprocessor failed +pub fn preprocess( + preprocessors: &mut [Box], + ingest_ns: &mut u64, + data: Vec, + meta: Value<'static>, + alias: &Alias, +) -> Result, Value<'static>)>> { + let mut data = vec![(data, meta)]; + let mut data1 = Vec::new(); + for pp in preprocessors { + for (i, (d, m)) in data.drain(..).enumerate() { + match pp.process(ingest_ns, &d, m) { + Ok(mut r) => data1.append(&mut r), + Err(e) => { + error!("[Connector::{alias}] Preprocessor [{i}] error: {e}"); + return Err(e); + } + } + } + std::mem::swap(&mut data, &mut data1); + } + Ok(data) +} + +/// Canonical way to finish preprocessors up +/// +/// # Errors +/// +/// * If a preprocessor failed +pub fn finish( + preprocessors: &mut [Box], + alias: &Alias, +) -> Result, Value<'static>)>> { + if let Some((head, tail)) = preprocessors.split_first_mut() { + let mut data = match head.finish(None, None) { + Ok(d) => d, + Err(e) => { + error!( + "[Connector::{alias}] Preprocessor '{}' finish error: {e}", + head.name() + ); + return Err(e); + } + }; + let mut data1 = Vec::new(); + for pp in tail { + for (d, m) in data.drain(..) { + match pp.finish(Some(&d), Some(m)) { + Ok(mut r) => data1.append(&mut r), + Err(e) => { + error!( + "[Connector::{alias}] Preprocessor '{}' finish error: {e}", + pp.name() + ); + return Err(e); + } + } + } + std::mem::swap(&mut data, &mut data1); + } + Ok(data) + } else { + Ok(vec![]) + } +} + +#[cfg(test)] +mod test { + #![allow(clippy::ignored_unit_patterns)] + use super::*; + use crate::errors::Result; + use crate::postprocessor::{self as post, separate::Separate as SeparatePost, Postprocessor}; + + #[test] + fn ingest_ts() -> Result<()> { + let mut pre_p = ingest_ns::ExtractIngestTs {}; + let mut post_p = post::ingest_ns::IngestNs {}; + + let data = vec![1_u8, 2, 3]; + + let encoded = post_p.process(42, 23, &data)?.pop().ok_or("no data")?; + + let mut in_ns = 0u64; + let decoded = pre_p + .process(&mut in_ns, &encoded, Value::object())? + .pop() + .ok_or("no data")? + .0; + + assert!(pre_p.finish(None, None)?.is_empty()); + + assert_eq!(data, decoded); + assert_eq!(in_ns, 42); + + // data too short + assert!(pre_p.process(&mut in_ns, &[0_u8], Value::object()).is_err()); + Ok(()) + } + + fn textual_prefix(len: usize) -> String { + format!("{len} {}", String::from_utf8_lossy(&vec![b'O'; len])) + } + + use proptest::prelude::*; + + // generate multiple chopped length-prefixed strings + fn multiple_textual_lengths(max_elements: usize) -> BoxedStrategy<(Vec, Vec)> { + proptest::collection::vec(".+", 1..max_elements) // generator for Vec of arbitrary strings, maximum length of vector: `max_elements` + .prop_map(|ss| { + let s: (Vec, Vec) = ss + .into_iter() + .map(|s| (s.len(), format!("{} {s}", s.len()))) // for each string, extract the length, and create a textual length prefix + .unzip(); + s + }) + .prop_map(|tuple| (tuple.0, tuple.1.join(""))) // generator for a tuple of 1. the sizes of the length prefixed strings, 2. the concatenated length prefixed strings as one giant string + .prop_map(|tuple| { + // here we chop the big string into up to 4 bits + let mut chopped = Vec::with_capacity(4); + let mut giant_string: String = tuple.1.clone(); + while !giant_string.is_empty() && chopped.len() < 4 { + // verify we are at a char boundary + let mut indices = giant_string.char_indices(); + let num_chars = giant_string.chars().count(); + if let Some((index, _)) = indices.nth(num_chars / 2) { + let mut splitted = giant_string.split_off(index); + std::mem::swap(&mut splitted, &mut giant_string); + chopped.push(splitted); + } else { + break; + } + } + chopped.push(giant_string); + (tuple.0, chopped) + }) + .boxed() + } + + proptest! { + #[test] + fn textual_length_prefix_prop((lengths, datas) in multiple_textual_lengths(5)) { + let mut pre_p = textual_length_prefixed::TextualLengthPrefixed::default(); + let mut in_ns = 0_u64; + let res: Vec<_> = datas.into_iter().flat_map(|data| { + pre_p.process(&mut in_ns, data.as_bytes(), Value::object()).unwrap_or_default() + }).collect(); + assert_eq!(lengths.len(), res.len()); + for (processed, expected_len) in res.iter().zip(lengths) { + assert_eq!(expected_len, processed.0.len()); + } + } + + #[test] + fn textual_length_pre_post(length in 1..100_usize) { + let data = vec![1_u8; length]; + let mut pre_p = textual_length_prefixed::TextualLengthPrefixed::default(); + let mut post_p = post::textual_length_prefixed::TextualLengthPrefixed::default(); + let encoded = post_p.process(0, 0, &data).unwrap_or_default().pop().unwrap_or_default(); + let mut in_ns = 0_u64; + let mut res = pre_p.process(&mut in_ns, &encoded, Value::object()).unwrap_or_default(); + assert_eq!(1, res.len()); + let payload = res.pop().unwrap_or_default().0; + assert_eq!(length, payload.len()); + } + } + + #[test] + fn textual_prefix_length_loop() { + let datas = vec![ + "24 \'?\u{d617e}ѨR\u{202e}\u{f8f7c}\u{ede29}\u{ac784}36 ?{¥?MȺ\r\u{bac41}9\u{5bbbb}\r\u{1c46c}\u{4ba79}¥\u{7f}*?:\u{0}$i", + "60 %\u{a825a}\u{a4269}\u{39e0c}\u{b3e21}<ì\u{f6c20}ѨÛ`HW\u{9523f}V", + "\u{3}\u{605fe}%Fq\u{89b5e}\u{93780}Q3", + "¥?\u{feff}9", + " \'�2\u{4269b}", + ]; + let lengths: Vec = vec![24, 36, 60, 9]; + let mut pre_p = textual_length_prefixed::TextualLengthPrefixed::default(); + let mut in_ns = 0_u64; + let res: Vec<_> = datas + .into_iter() + .flat_map(|data| { + pre_p + .process(&mut in_ns, data.as_bytes(), Value::object()) + .unwrap_or_default() + }) + .collect(); + assert_eq!(lengths.len(), res.len()); + for (processed, expected_len) in res.iter().zip(lengths) { + assert_eq!(expected_len, processed.0.len()); + } + } + + #[test] + fn textual_length_prefix() { + let mut pre_p = textual_length_prefixed::TextualLengthPrefixed::default(); + let data = textual_prefix(42); + let mut in_ns = 0_u64; + let mut res = pre_p + .process(&mut in_ns, data.as_bytes(), Value::object()) + .unwrap_or_default(); + assert_eq!(1, res.len()); + let payload = res.pop().unwrap_or_default().0; + assert_eq!(42, payload.len()); + } + + #[test] + fn empty_textual_prefix() { + let data = ("").as_bytes(); + let mut pre_p = textual_length_prefixed::TextualLengthPrefixed::default(); + let mut post_p = post::textual_length_prefixed::TextualLengthPrefixed::default(); + let mut in_ns = 0_u64; + let res = pre_p + .process(&mut in_ns, data, Value::object()) + .unwrap_or_default(); + assert_eq!(0, res.len()); + + let data_empty = vec![]; + let encoded = post_p + .process(42, 23, &data_empty) + .unwrap_or_default() + .pop() + .unwrap_or_default(); + assert_eq!("0 ", String::from_utf8_lossy(&encoded)); + let mut res2 = pre_p + .process(&mut in_ns, &encoded, Value::object()) + .unwrap_or_default(); + assert_eq!(1, res2.len()); + let payload = res2.pop().unwrap_or_default().0; + assert_eq!(0, payload.len()); + } + + #[test] + fn length_prefix() -> Result<()> { + let mut it = 0; + + let pre_p = length_prefixed::LengthPrefixed::default(); + let mut post_p = post::length_prefixed::LengthPrefixed::default(); + + let data = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; + let wire = post_p.process(0, 0, &data)?; + let (start, end) = wire[0].split_at(7); + let alias = Alias::new("test", "test"); + let mut pps: Vec> = vec![Box::new(pre_p)]; + let recv = preprocess( + pps.as_mut_slice(), + &mut it, + start.to_vec(), + Value::object(), + &alias, + )?; + assert!(recv.is_empty()); + let recv = preprocess( + pps.as_mut_slice(), + &mut it, + end.to_vec(), + Value::object(), + &alias, + )?; + assert_eq!(recv[0].0, data); + + // incomplete data + let processed = preprocess( + pps.as_mut_slice(), + &mut it, + start.to_vec(), + Value::object(), + &alias, + )?; + assert!(processed.is_empty()); + // not emitted upon finish + let finished = finish(pps.as_mut_slice(), &alias)?; + assert!(finished.is_empty()); + + Ok(()) + } + + const LOOKUP_TABLE: [&str; 8] = [ + "separate", + "base64", + "decompress", + "remove-empty", + "gelf-chunking", + "ingest-ns", + "length-prefixed", + "textual-length-prefixed", + ]; + + #[test] + fn test_lookup() { + for t in &LOOKUP_TABLE { + assert!(lookup(t).is_ok()); + } + let t = "snot"; + assert!(lookup(t).is_err()); + + assert!(lookup("bad_lookup").is_err()); + } + + #[test] + fn test_filter_empty() { + let mut pre = remove_empty::RemoveEmpty::default(); + assert_eq!(Ok(vec![]), pre.process(&mut 0_u64, &[], Value::object())); + assert_eq!(Ok(vec![]), pre.finish(None, None)); + } + + #[test] + fn test_filter_null() { + let mut pre = remove_empty::RemoveEmpty::default(); + assert_eq!(Ok(vec![]), pre.process(&mut 0_u64, &[], Value::object())); + assert_eq!(Ok(vec![]), pre.finish(None, None)); + } + + #[test] + fn test_lines() -> Result<()> { + let int = "snot\nbadger".as_bytes(); + let enc = "snot\nbadger\n".as_bytes(); // First event ( event per line ) + let out = "snot".as_bytes(); + + let mut post = SeparatePost::default(); + let mut pre = separate::Separate::default(); + + let mut ingest_ns = 0_u64; + let egress_ns = 1_u64; + + let r = post.process(ingest_ns, egress_ns, int); + assert!(r.is_ok(), "Expected Ok(...), Got: {r:?}"); + let ext = &r?[0]; + let ext = ext.as_slice(); + // Assert actual encoded form is as expected + assert_eq!(enc, ext); + + let r = pre.process(&mut ingest_ns, ext, Value::object()); + let out2 = &r?[0].0; + let out2 = out2.as_slice(); + // Assert actual decoded form is as expected + assert_eq!(out, out2); + + // assert empty finish, no leftovers + assert!(pre.finish(None, None)?.is_empty()); + Ok(()) + } + + #[test] + fn test_separate_buffered() -> Result<()> { + let input = "snot\nbadger\nwombat\ncapybara\nquagga".as_bytes(); + let mut pre = separate::Separate::new(b'\n', 1000, true); + let mut ingest_ns = 0_u64; + let mut res = pre.process(&mut ingest_ns, input, Value::object())?; + let splitted = input + .split(|c| *c == b'\n') + .map(|v| (v.to_vec(), Value::object())) + .collect::>(); + assert_eq!(splitted[..splitted.len() - 1].to_vec(), res); + let mut finished = pre.finish(None, None)?; + res.append(&mut finished); + assert_eq!(splitted, res); + Ok(()) + } + + macro_rules! assert_separate_no_buffer { + ($inbound:expr, $outbound1:expr, $outbound2:expr, $case_number:expr, $separator:expr) => { + let mut ingest_ns = 0_u64; + let r = separate::Separate::new($separator, 0, false).process( + &mut ingest_ns, + $inbound, + Value::object(), + ); + + let out = &r?; + // Assert preprocessor output is as expected + assert!( + 2 == out.len(), + "Test case : {} => expected output = {}, actual output = {}", + $case_number, + "2", + out.len() + ); + assert!( + $outbound1 == out[0].0.as_slice(), + "Test case : {} => expected output = \"{}\", actual output = \"{}\"", + $case_number, + std::str::from_utf8($outbound1).unwrap(), + std::str::from_utf8(out[0].0.as_slice()).unwrap() + ); + assert!( + $outbound2 == out[1].0.as_slice(), + "Test case : {} => expected output = \"{}\", actual output = \"{}\"", + $case_number, + std::str::from_utf8($outbound2).unwrap(), + std::str::from_utf8(out[1].0.as_slice()).unwrap() + ); + }; + } + + #[allow(clippy::type_complexity)] + #[test] + fn test_separate_no_buffer_no_maxlength() -> Result<()> { + let test_data: [(&'static [u8], &'static [u8], &'static [u8], &'static str); 4] = [ + (b"snot\nbadger", b"snot", b"badger", "0"), + (b"snot\n", b"snot", b"", "1"), + (b"\nsnot", b"", b"snot", "2"), + (b"\n", b"", b"", "3"), + ]; + for case in &test_data { + assert_separate_no_buffer!(case.0, case.1, case.2, case.3, b'\n'); + } + + Ok(()) + } + + #[allow(clippy::type_complexity)] + #[test] + fn test_carriage_return_no_buffer_no_maxlength() -> Result<()> { + let test_data: [(&'static [u8], &'static [u8], &'static [u8], &'static str); 4] = [ + (b"snot\rbadger", b"snot", b"badger", "0"), + (b"snot\r", b"snot", b"", "1"), + (b"\rsnot", b"", b"snot", "2"), + (b"\r", b"", b"", "3"), + ]; + for case in &test_data { + assert_separate_no_buffer!(case.0, case.1, case.2, case.3, b'\r'); + } + + Ok(()) + } + + #[test] + fn test_base64() -> Result<()> { + let int = "snot badger".as_bytes(); + let enc = "c25vdCBiYWRnZXI=".as_bytes(); + + let mut pre = base64::Base64::default(); + let mut post = post::base64::Base64::default(); + + // Fake ingest_ns and egress_ns + let mut ingest_ns = 0_u64; + let egress_ns = 1_u64; + + let r = post.process(ingest_ns, egress_ns, int); + let ext = &r?[0]; + let ext = ext.as_slice(); + // Assert actual encoded form is as expected + assert_eq!(&enc, &ext); + + let r = pre.process(&mut ingest_ns, ext, Value::object()); + let out = &r?[0].0; + let out = out.as_slice(); + // Assert actual decoded form is as expected + assert_eq!(&int, &out); + + // assert empty finish, no leftovers + assert!(pre.finish(None, None)?.is_empty()); + Ok(()) + } + + struct BadPreprocessor {} + impl Preprocessor for BadPreprocessor { + fn name(&self) -> &'static str { + "chucky" + } + + fn process( + &mut self, + _ingest_ns: &mut u64, + _data: &[u8], + _meta: Value<'static>, + ) -> Result, Value<'static>)>> { + Err("chucky".into()) + } + fn finish( + &mut self, + _data: Option<&[u8]>, + _meta: Option>, + ) -> Result, Value<'static>)>> { + Ok(vec![]) + } + } + + struct BadFinisher {} + impl Preprocessor for BadFinisher { + fn name(&self) -> &'static str { + "chucky" + } + + fn process( + &mut self, + _ingest_ns: &mut u64, + _data: &[u8], + _meta: Value<'static>, + ) -> Result, Value<'static>)>> { + Ok(vec![]) + } + + fn finish( + &mut self, + _data: Option<&[u8]>, + _meta: Option>, + ) -> Result, Value<'static>)>> { + Err("chucky revenge".into()) + } + } + + struct NoOp {} + impl Preprocessor for NoOp { + fn name(&self) -> &'static str { + "nily" + } + + fn process( + &mut self, + _ingest_ns: &mut u64, + _data: &[u8], + meta: Value<'static>, + ) -> Result, Value<'static>)>> { + Ok(vec![(b"non".to_vec(), meta)]) + } + fn finish( + &mut self, + _data: Option<&[u8]>, + meta: Option>, + ) -> Result, Value<'static>)>> { + Ok(vec![(b"nein".to_vec(), meta.unwrap_or_else(Value::object))]) + } + } + + #[test] + fn badly_behaved_process() { + let mut pre = Box::new(BadPreprocessor {}); + assert_eq!("chucky", pre.name()); + + let mut ingest_ns = 0_u64; + let r = pre.process(&mut ingest_ns, b"foo", Value::object()); + assert!(r.is_err()); + + let r = pre.finish(Some(b"foo"), Some(Value::object())); + assert!(r.is_ok()); + } + + #[test] + fn badly_behaved_finish() { + let mut pre = Box::new(BadFinisher {}); + assert_eq!("chucky", pre.name()); + + let mut ingest_ns = 0_u64; + let r = pre.process(&mut ingest_ns, b"foo", Value::object()); + assert!(r.is_ok()); + + let r = pre.finish(Some(b"foo"), Some(Value::object())); + assert!(r.is_err()); + } + + #[test] + fn single_pre_process_head_ok() { + let pre = Box::new(BadPreprocessor {}); + let alias = tremor_common::alias::Connector::new( + tremor_common::alias::Flow::new("chucky"), + "chucky".to_string(), + ); + let mut ingest_ns = 0_u64; + let r = preprocess( + &mut [pre], + &mut ingest_ns, + b"foo".to_vec(), + Value::object(), + &alias, + ); + assert!(r.is_err()); + } + + #[test] + fn single_pre_process_tail_err() { + let noop = Box::new(NoOp {}); + assert_eq!("nily", noop.name()); + let pre = Box::new(BadPreprocessor {}); + let alias = tremor_common::alias::Connector::new( + tremor_common::alias::Flow::new("chucky"), + "chucky".to_string(), + ); + let mut ingest_ns = 0_u64; + let r = preprocess( + &mut [noop, pre], + &mut ingest_ns, + b"foo".to_vec(), + Value::object(), + &alias, + ); + assert!(r.is_err()); + } + + #[test] + fn single_pre_finish_ok() { + let pre = Box::new(BadPreprocessor {}); + let alias = tremor_common::alias::Connector::new( + tremor_common::alias::Flow::new("chucky"), + "chucky".to_string(), + ); + let r = finish(&mut [pre], &alias); + assert!(r.is_ok()); + } + + #[test] + fn direct_pre_finish_err() { + let mut pre = Box::new(BadFinisher {}); + let r = pre.finish(Some(b"foo"), Some(Value::object())); + assert!(r.is_err()); + } + + #[test] + fn preprocess_finish_head_fail() { + let alias = tremor_common::alias::Connector::new( + tremor_common::alias::Flow::new("chucky"), + "chucky".to_string(), + ); + let pre = Box::new(BadFinisher {}); + let r = finish(&mut [pre], &alias); + assert!(r.is_err()); + } + + #[test] + fn preprocess_finish_tail_fail() { + let alias = tremor_common::alias::Connector::new( + tremor_common::alias::Flow::new("chucky"), + "chucky".to_string(), + ); + let noop = Box::new(NoOp {}); + let pre = Box::new(BadFinisher {}); + let r = finish(&mut [noop, pre], &alias); + assert!(r.is_err()); + } + + #[test] + fn preprocess_finish_multi_ok() { + let alias = tremor_common::alias::Connector::new( + tremor_common::alias::Flow::new("xyz"), + "xyz".to_string(), + ); + let noop1 = Box::new(NoOp {}); + let noop2 = Box::new(NoOp {}); + let noop3 = Box::new(NoOp {}); + let r = finish(&mut [noop1, noop2, noop3], &alias); + assert!(r.is_ok()); + } +} diff --git a/src/preprocessor/base64.rs b/tremor-interceptor/src/preprocessor/base64.rs similarity index 94% rename from src/preprocessor/base64.rs rename to tremor-interceptor/src/preprocessor/base64.rs index 6d0ee47283..56fdc95737 100644 --- a/src/preprocessor/base64.rs +++ b/tremor-interceptor/src/preprocessor/base64.rs @@ -14,8 +14,7 @@ //! Decodes base64 encoded data to the raw bytes. use super::prelude::*; -use base64::Engine; -use tremor_common::base64::BASE64; +use tremor_common::base64::{Engine, BASE64}; #[derive(Clone, Default, Debug)] pub(crate) struct Base64 {} diff --git a/src/preprocessor/decompress.rs b/tremor-interceptor/src/preprocessor/decompress.rs similarity index 100% rename from src/preprocessor/decompress.rs rename to tremor-interceptor/src/preprocessor/decompress.rs diff --git a/src/preprocessor/gelf_chunking.rs b/tremor-interceptor/src/preprocessor/gelf_chunking.rs similarity index 98% rename from src/preprocessor/gelf_chunking.rs rename to tremor-interceptor/src/preprocessor/gelf_chunking.rs index a5461eb18c..ef653f9567 100644 --- a/src/preprocessor/gelf_chunking.rs +++ b/tremor-interceptor/src/preprocessor/gelf_chunking.rs @@ -53,9 +53,10 @@ //! ``` use super::prelude::*; -use crate::errors::{Kind as ErrorKind, Result}; -use hashbrown::{hash_map::Entry, HashMap}; +use crate::errors::{ErrorKind, Result}; +use log::{error, warn}; use rand::{self, RngCore}; +use std::collections::{hash_map::Entry, HashMap}; const FIVE_SEC: u64 = 5_000_000_000; @@ -264,7 +265,7 @@ fn assemble(key: u64, m: GelfMsgs) -> Option> { #[cfg(test)] mod test { use super::*; - use crate::Result; + use crate::errors::Result; #[test] fn gelf_chunking_default() -> Result<()> { diff --git a/src/preprocessor/ingest_ns.rs b/tremor-interceptor/src/preprocessor/ingest_ns.rs similarity index 100% rename from src/preprocessor/ingest_ns.rs rename to tremor-interceptor/src/preprocessor/ingest_ns.rs diff --git a/src/preprocessor/length_prefixed.rs b/tremor-interceptor/src/preprocessor/length_prefixed.rs similarity index 100% rename from src/preprocessor/length_prefixed.rs rename to tremor-interceptor/src/preprocessor/length_prefixed.rs diff --git a/src/preprocessor/remove_empty.rs b/tremor-interceptor/src/preprocessor/remove_empty.rs similarity index 97% rename from src/preprocessor/remove_empty.rs rename to tremor-interceptor/src/preprocessor/remove_empty.rs index e21b56996a..6f51004f8b 100644 --- a/src/preprocessor/remove_empty.rs +++ b/tremor-interceptor/src/preprocessor/remove_empty.rs @@ -15,7 +15,7 @@ //! Removes empty messages (aka zero len). This one is best used in a chain after a splitting preprocessor, like [`separate`](./separate.md) use super::prelude::*; -use crate::Result; +use crate::errors::Result; #[derive(Default, Debug, Clone)] pub(crate) struct RemoveEmpty {} diff --git a/src/preprocessor/separate.rs b/tremor-interceptor/src/preprocessor/separate.rs similarity index 99% rename from src/preprocessor/separate.rs rename to tremor-interceptor/src/preprocessor/separate.rs index 1510f72a5b..10f4ff1180 100644 --- a/src/preprocessor/separate.rs +++ b/tremor-interceptor/src/preprocessor/separate.rs @@ -59,14 +59,14 @@ //! ``` use super::prelude::*; -use crate::{ - connectors::prelude::*, - errors::{Kind as ErrorKind, Result}, -}; +use crate::errors::{ErrorKind, Result}; +use log::trace; use memchr::memchr_iter; +use serde::{Deserialize, Serialize}; use std::num::NonZeroUsize; -use tremor_pipeline::{ConfigImpl, ConfigMap}; +use tremor_config::{Impl as ConfigImpl, Map as ConfigMap}; +pub(crate) const DEFAULT_BUF_SIZE: usize = 8 * 1024; pub(crate) const DEFAULT_SEPARATOR: u8 = b'\n'; const INITIAL_PARTS_PER_CHUNK: usize = 64; @@ -85,7 +85,7 @@ pub(crate) fn default_separator() -> String { String::from_utf8_lossy(&[DEFAULT_SEPARATOR]).into_owned() } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Clone)] pub struct Separate { @@ -314,7 +314,7 @@ mod test { use tremor_value::literal; use super::*; - use crate::Result; + use crate::errors::Result; #[test] fn from_config() -> Result<()> { diff --git a/src/preprocessor/textual_length_prefixed.rs b/tremor-interceptor/src/preprocessor/textual_length_prefixed.rs similarity index 98% rename from src/preprocessor/textual_length_prefixed.rs rename to tremor-interceptor/src/preprocessor/textual_length_prefixed.rs index f40e926806..238abe318d 100644 --- a/src/preprocessor/textual_length_prefixed.rs +++ b/tremor-interceptor/src/preprocessor/textual_length_prefixed.rs @@ -15,7 +15,7 @@ //! Extracts the message based on prefixed message length given in ascii digits which is followed by a space as used in [RFC 5425](https://tools.ietf.org/html/rfc5425#section-4.3) for TLS/TCP transport for syslog use super::prelude::*; -use crate::Result; +use crate::errors::Result; use bytes::{Buf, BytesMut}; #[derive(Clone, Default, Debug)] diff --git a/tremor-pipeline/Cargo.toml b/tremor-pipeline/Cargo.toml index 276b76b9f3..79277442af 100644 --- a/tremor-pipeline/Cargo.toml +++ b/tremor-pipeline/Cargo.toml @@ -26,6 +26,7 @@ simd-json = { version = "0.11", features = ["known-key"] } simd-json-derive = "0.11" sled = "0.34" tremor-common = { version = "0.13.0-rc.16", path = "../tremor-common" } +tremor-config = { version = "0.13.0-rc.16", path = "../tremor-config" } tremor-script = { version = "0.13.0-rc.16", path = "../tremor-script" } tremor-value = { version = "0.13.0-rc.16", path = "../tremor-value" } url = "2" @@ -33,7 +34,6 @@ value-trait = "0.6" window = { git = "https://github.com/tremor-rs/window.git", tag = "v0.1.1" } [dev-dependencies] -base64 = "0.21" criterion = "0.5" tempfile = "3.8" xz2 = "0.1" diff --git a/tremor-pipeline/src/executable_graph.rs b/tremor-pipeline/src/executable_graph.rs index 6a0d6ef48f..64cca0160e 100644 --- a/tremor-pipeline/src/executable_graph.rs +++ b/tremor-pipeline/src/executable_graph.rs @@ -17,13 +17,14 @@ use crate::{ errors::Result, errors::{Error, ErrorKind}, metrics::value_count, - ConfigMap, ExecPortIndexMap, MetricsMsg, MetricsSender, NodeLookupFn, + ExecPortIndexMap, MetricsMsg, MetricsSender, NodeLookupFn, }; use crate::{op::EventAndInsights, Event, NodeKind, Operator}; use halfbrown::HashMap; use simd_json::ObjectHasher; use std::{fmt, fmt::Display}; use tremor_common::{ids::OperatorId, ports::Port, stry}; +use tremor_config::Map as ConfigMap; use tremor_script::{ast::Helper, ast::Stmt}; use tremor_value::{Object, Value}; diff --git a/tremor-pipeline/src/lib.rs b/tremor-pipeline/src/lib.rs index 94a9f1b19b..b7b7f47e0d 100644 --- a/tremor-pipeline/src/lib.rs +++ b/tremor-pipeline/src/lib.rs @@ -72,13 +72,10 @@ pub mod query; pub use crate::event::{Event, ValueIter, ValueMetaIter}; pub use crate::executable_graph::{ExecutableGraph, OperatorNode}; pub(crate) use crate::executable_graph::{NodeMetrics, State}; -pub use op::{ConfigImpl, InitializableOperator, Operator}; +pub use op::{InitializableOperator, Operator}; pub use tremor_script::prelude::EventOriginUri; pub(crate) type ExecPortIndexMap = HashMap<(usize, Port<'static>), Vec<(usize, Port<'static>)>>; -/// A configuration map -pub type ConfigMap = Option>; - /// A lookup function to used to look up operators pub type NodeLookupFn = fn( config: &NodeConfig, diff --git a/tremor-pipeline/src/op.rs b/tremor-pipeline/src/op.rs index 0a8119de26..37251f272b 100644 --- a/tremor-pipeline/src/op.rs +++ b/tremor-pipeline/src/op.rs @@ -150,18 +150,3 @@ pub trait InitializableOperator { //// if no operator con be instanciated from the provided NodeConfig fn node_to_operator(&self, uid: OperatorId, node: &NodeConfig) -> Result>; } - -/// Trait for detecting errors in config and the key names are included in errors -pub trait ConfigImpl { - /// deserialises the config into a struct and returns nice errors - /// this doesn't need to be overwritten in most cases. - /// - /// # Errors - /// if the Configuration is invalid - fn new(config: &tremor_value::Value) -> Result - where - Self: serde::de::Deserialize<'static>, - { - Ok(tremor_value::structurize(config.clone_static())?) - } -} diff --git a/tremor-pipeline/src/op/bert/sequence_classification.rs b/tremor-pipeline/src/op/bert/sequence_classification.rs index f2aada6678..0d05f5dec9 100644 --- a/tremor-pipeline/src/op/bert/sequence_classification.rs +++ b/tremor-pipeline/src/op/bert/sequence_classification.rs @@ -52,7 +52,7 @@ fn dflt_vocabulary() -> String { } type Resource = Box; -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} struct SequenceClassification { model: Mutex, diff --git a/tremor-pipeline/src/op/bert/summarization.rs b/tremor-pipeline/src/op/bert/summarization.rs index 6eb5440eff..05b65c7e9d 100644 --- a/tremor-pipeline/src/op/bert/summarization.rs +++ b/tremor-pipeline/src/op/bert/summarization.rs @@ -30,7 +30,7 @@ struct Config { file: String, // just a stupid placeholder } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} struct Summerization { model: Mutex, diff --git a/tremor-pipeline/src/op/debug/history.rs b/tremor-pipeline/src/op/debug/history.rs index 8c0b3ad3ba..00d6c44862 100644 --- a/tremor-pipeline/src/op/debug/history.rs +++ b/tremor-pipeline/src/op/debug/history.rs @@ -44,7 +44,6 @@ //! ``` use crate::op::prelude::*; -use crate::ConfigImpl; use tremor_script::prelude::*; #[derive(Debug, Clone, Deserialize)] @@ -56,7 +55,7 @@ pub struct Config { pub name: String, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} op!(EventHistoryFactory(_uid, node) { if let Some(map) = &node.config { diff --git a/tremor-pipeline/src/op/generic/batch.rs b/tremor-pipeline/src/op/generic/batch.rs index 89069888b8..eb03a50fde 100644 --- a/tremor-pipeline/src/op/generic/batch.rs +++ b/tremor-pipeline/src/op/generic/batch.rs @@ -53,7 +53,7 @@ pub struct Config { pub timeout: Option, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} #[derive(Debug, Clone)] struct Batch { diff --git a/tremor-pipeline/src/op/prelude.rs b/tremor-pipeline/src/op/prelude.rs index d07823f6f8..76051d059c 100644 --- a/tremor-pipeline/src/op/prelude.rs +++ b/tremor-pipeline/src/op/prelude.rs @@ -20,4 +20,5 @@ pub use halfbrown::{hashmap, HashMap}; pub use serde_yaml; pub use simd_json::OwnedValue; pub use tremor_common::ports::{Port, ERR, IN, OUT, OVERFLOW}; +pub use tremor_config::Impl as ConfigImpl; pub use value_trait::Value as ValueTrait; diff --git a/tremor-pipeline/src/op/qos/backpressure.rs b/tremor-pipeline/src/op/qos/backpressure.rs index a8a8a868bd..9d037cf0ae 100644 --- a/tremor-pipeline/src/op/qos/backpressure.rs +++ b/tremor-pipeline/src/op/qos/backpressure.rs @@ -80,7 +80,7 @@ pub struct Config { pub method: Method, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} fn default_steps() -> Vec { vec![50, 100, 250, 500, 1000, 5000, 10000] diff --git a/tremor-pipeline/src/op/qos/percentile.rs b/tremor-pipeline/src/op/qos/percentile.rs index f0a89c0e5f..1276ac89e9 100644 --- a/tremor-pipeline/src/op/qos/percentile.rs +++ b/tremor-pipeline/src/op/qos/percentile.rs @@ -68,7 +68,7 @@ pub struct Config { pub step_up: f64, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} fn default_step_up() -> f64 { 0.001 diff --git a/tremor-pipeline/src/op/qos/roundrobin.rs b/tremor-pipeline/src/op/qos/roundrobin.rs index ff690d97e1..eef0d620c7 100644 --- a/tremor-pipeline/src/op/qos/roundrobin.rs +++ b/tremor-pipeline/src/op/qos/roundrobin.rs @@ -43,7 +43,7 @@ pub struct Config { pub outputs: Vec, } -impl ConfigImpl for Config {} +impl tremor_config::Impl for Config {} fn default_outputs() -> Vec { vec![OUT.to_string()] diff --git a/tremor-script/Cargo.toml b/tremor-script/Cargo.toml index 5d468d4223..f667187412 100644 --- a/tremor-script/Cargo.toml +++ b/tremor-script/Cargo.toml @@ -22,7 +22,6 @@ name = "tremor_script" [dependencies] atty = "0.2" -base64 = "0.21" beef = { version = "0.5", features = ["impl_serde"] } byteorder = "1" chrono = "0.4" diff --git a/tremor-script/src/ast/deploy.rs b/tremor-script/src/ast/deploy.rs index 2e12cb9288..8352c43b12 100644 --- a/tremor-script/src/ast/deploy.rs +++ b/tremor-script/src/ast/deploy.rs @@ -15,7 +15,7 @@ // We want to keep the names here #![allow(clippy::module_name_repetitions)] -use tremor_common::ports::Port; +use tremor_common::{alias, ports::Port}; use super::{ docs::Docs, helper::Scope, node_id::BaseRef, raw::BaseExpr, CreationalWith, DefinitionalArgs, @@ -303,6 +303,12 @@ impl crate::ast::node_id::BaseRef for DeployFlow<'_> { } } +impl From<&DeployFlow<'_>> for alias::Flow { + fn from(val: &DeployFlow<'_>) -> Self { + alias::Flow::from(val.instance_alias.clone()) + } +} + #[cfg(test)] mod test { use super::*; diff --git a/tremor-script/src/extractor.rs b/tremor-script/src/extractor.rs index 2f077d36c8..6b91df0b58 100644 --- a/tremor-script/src/extractor.rs +++ b/tremor-script/src/extractor.rs @@ -36,10 +36,9 @@ mod re; use crate::{grok::PATTERNS_FILE_DEFAULT_PATH, prelude::*}; use crate::{EventContext, Value}; -use ::base64::Engine; use re::Regex; use std::{fmt, iter::Iterator, result::Result as StdResult}; -use tremor_common::base64::BASE64; +use tremor_common::base64::{Engine, BASE64}; use self::cidr::SnotCombiner; diff --git a/tremor-script/src/extractor/base64.rs b/tremor-script/src/extractor/base64.rs index 1cf45afe4c..bed87129e4 100644 --- a/tremor-script/src/extractor/base64.rs +++ b/tremor-script/src/extractor/base64.rs @@ -31,8 +31,7 @@ //! ## Output: 🌊 snot badger //! ``` use super::{Error, Result, StdResult}; -use base64::Engine; -use tremor_common::base64::BASE64; +use tremor_common::base64::{Engine, BASE64}; use tremor_value::Value; pub(crate) fn execute(s: &str, result_needed: bool) -> Result<'static> { diff --git a/tremor-script/src/std_lib/base64.rs b/tremor-script/src/std_lib/base64.rs index c63a384999..7048563bea 100644 --- a/tremor-script/src/std_lib/base64.rs +++ b/tremor-script/src/std_lib/base64.rs @@ -14,8 +14,7 @@ use crate::registry::Registry; use crate::{tremor_const_fn, tremor_fn_}; -use base64::Engine; -use tremor_common::base64::BASE64; +use tremor_common::base64::{Engine, BASE64}; pub fn load(registry: &mut Registry) { registry diff --git a/tremor-value/src/macros.rs b/tremor-value/src/macros.rs index c2729b894b..ce5be9cf8a 100644 --- a/tremor-value/src/macros.rs +++ b/tremor-value/src/macros.rs @@ -192,7 +192,7 @@ macro_rules! literal_internal { // Done. Insert all entries from the stack (@object $object:ident [@entries $(($value:expr => $($key:tt)+))*] () () ()) => { let len = literal_internal!(@object @count [@entries $(($value => $($key)+))*]); - $object = $crate::Object::with_capacity_and_hasher(len, simd_json::value::ObjectHasher::default()); + $object = $crate::Object::with_capacity_and_hasher(len, ::simd_json::value::ObjectHasher::default()); $( // ALLOW: this is a macro, we don't care about the return value $object.insert(($($key)+).into(), $value); diff --git a/tremor-value/src/value/cmp.rs b/tremor-value/src/value/cmp.rs index 92b380958d..c85e245a8c 100644 --- a/tremor-value/src/value/cmp.rs +++ b/tremor-value/src/value/cmp.rs @@ -13,9 +13,8 @@ // limitations under the License. use super::Value; -use base64::Engine; use simd_json::{prelude::*, BorrowedValue, OwnedValue}; -use tremor_common::base64::BASE64; +use tremor_common::base64::{Engine, BASE64}; #[allow(clippy::cast_sign_loss, clippy::default_trait_access)] impl<'value> PartialEq for Value<'value> { From ed24a4e2e0d33f539e3acd03e3ff013b903c1556 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Thu, 12 Oct 2023 17:59:31 +0200 Subject: [PATCH 5/9] Remove avro kafka schema registry leftovers Signed-off-by: Heinz N. Gies --- tremor-codec/src/codec/avro.rs | 54 +++------------------------------- 1 file changed, 4 insertions(+), 50 deletions(-) diff --git a/tremor-codec/src/codec/avro.rs b/tremor-codec/src/codec/avro.rs index fbdab2154c..59a610d70c 100644 --- a/tremor-codec/src/codec/avro.rs +++ b/tremor-codec/src/codec/avro.rs @@ -58,35 +58,6 @@ const AVRO_BUFFER_CAP: usize = 512; #[derive(Clone, Debug, Default)] struct AvroRegistry { by_name: HashMap, - by_id: HashMap, - registry_url: Option, -} - -impl AvroRegistry { - fn get_schema_by_id(&self, id: u32) -> Option<&Schema> { - self.by_id.get(&id) - } - - async fn maybe_fetch_id(&mut self, id: u32) -> Result<()> { - if self.by_id.contains_key(&id) { - return Ok(()); - } - if let Some(url) = self.registry_url.as_ref() { - let schema = String::from_utf8( - reqwest::get(&format!("{url}/schemas/ids/{id}")) - .await? - .bytes() - .await? - .to_vec(), - )?; - let schema = Schema::parse_str(&schema)?; - if let Some(name) = schema.name().cloned() { - self.by_name.insert(name, schema.clone()); - } - self.by_id.insert(id, schema); - } - Ok(()) - } } #[derive(Clone, Debug)] @@ -108,10 +79,7 @@ impl Avro { Some(c) => return Err(format!("Unknown compression codec: {c}").into()), }; - let mut registry = AvroRegistry { - registry_url: config.get_str("registry").map(ToString::to_string), - ..AvroRegistry::default() - }; + let mut registry = AvroRegistry::default(); match config.get("schema") { Some(schema) => { @@ -356,14 +324,7 @@ impl Codec for Avro { _ingest_ns: u64, meta: Value<'input>, ) -> Result, Value<'input>)>> { - let schema = if let Some(schema_id) = meta.get_u32("schema_id") { - self.registry.maybe_fetch_id(schema_id).await?; - self.registry - .get_schema_by_id(schema_id) - .ok_or_else(|| format!("No schema found for id {schema_id} in registry"))? - } else { - &self.schema - }; + let schema = &self.schema; let reader = Reader::with_schema(schema, &*data)?; @@ -371,15 +332,8 @@ impl Codec for Avro { vals.next().map(|v| v.map(|v| (v, meta))).transpose() } - async fn encode(&mut self, data: &Value, meta: &Value) -> Result> { - let schema = if let Some(schema_id) = meta.get_u32("schema_id") { - self.registry.maybe_fetch_id(schema_id).await?; - self.registry - .get_schema_by_id(schema_id) - .ok_or_else(|| format!("No schema found for id {schema_id} in registry"))? - } else { - &self.schema - }; + async fn encode(&mut self, data: &Value, _meta: &Value) -> Result> { + let schema = &self.schema; let mut writer = Writer::with_codec( schema, Vec::with_capacity(AVRO_BUFFER_CAP), From 8a6f71418dc90a5f6e44bc029506077881530665 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Thu, 12 Oct 2023 18:01:43 +0200 Subject: [PATCH 6/9] Add changelog and cleanup Signed-off-by: Heinz N. Gies --- CHANGELOG.md | 7 ++++ Cargo.lock | 7 ++-- Cargo.toml | 9 +---- docs/Makefile | 4 +- src/connectors/tests/http/client.rs | 2 +- src/connectors/tests/ws.rs | 3 +- src/lib.rs | 4 -- tremor-cli/Cargo.toml | 4 -- tremor-codec/Cargo.toml | 2 - tremor-codec/src/codec/avro.rs | 35 ++++------------- .../src/codec/kafka_schema_registry.rs | 34 ++++++++++++++++- tremor-common/Cargo.toml | 2 +- tremor-config/src/lib.rs | 38 +++++++++++++++++++ tremor-pipeline/Cargo.toml | 2 +- tremor-script-nif/Cargo.toml | 2 +- 15 files changed, 97 insertions(+), 58 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e33a0ef743..eb58afa604 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ ## Unreleased +### New features +* kafka schema registry codec + +### Breaking Changes +* remove schema_registry preprocessor +* remove defunct schema_registry support for avro codec + ## [0.13.0-rc.16] ### New features diff --git a/Cargo.lock b/Cargo.lock index 49f8efedac..5fbdcdc047 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3230,9 +3230,9 @@ dependencies = [ [[package]] name = "lru" -version = "0.11.1" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a83fb7698b3643a0e34f9ae6f2e8f0178c0fd42f8b59d493aa271ff3a5bf21" +checksum = "1efa59af2ddfad1854ae27d75009d538d0998b4b2fd47083e743ac1a10e46c60" dependencies = [ "hashbrown 0.14.1", ] @@ -4658,7 +4658,7 @@ dependencies = [ [[package]] name = "schema_registry_converter" version = "3.1.0" -source = "git+https://github.com/tremor-rs/schema_registry_converter.git?branch=housekeeping#9e88b5845cd1f0142786815d23844f3c4fa7820a" +source = "git+https://github.com/tremor-rs/schema_registry_converter.git?branch=housekeeping#8df31e3e250ead445349432a5b5803d0c9985f2d" dependencies = [ "apache-avro", "byteorder", @@ -6257,6 +6257,7 @@ dependencies = [ "base64 0.21.4", "beef", "bimap", + "bytes", "chrono", "chrono-tz", "clickhouse-rs", diff --git a/Cargo.toml b/Cargo.toml index d29346089e..a1e43791e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -141,12 +141,6 @@ cron = "0.12" # logstash grok patterns grok = "2" -# sse-onramp -#surf-sse = { git = "https://github.com/dak-x/surf-sse", tag = "2.0", default-features = false } - -# nats -#async-nats = "0.10.1" - # discord serenity = { version = "0.11", default-features = false, features = [ "client", @@ -203,7 +197,6 @@ simdutf8 = "0.1" [dev-dependencies] port_scanner = "0.1" serial_test = { version = "2.0", features = ["logging"] } -# path = "../serial_test/serial_test" env_logger = "0.10" matches = "0.1" pretty_assertions = "1.4" @@ -217,7 +210,7 @@ tempfile = { version = "3.8" } test-case = "3.1" testcontainers = { version = "0.14", features = ["watchdog"] } num_cpus = "1" - +bytes = "1" [features] default = [] diff --git a/docs/Makefile b/docs/Makefile index 5b60552e7c..87909f27a0 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -4,10 +4,10 @@ operator-docs: scripts/operators.sh preprocessor-docs: - scripts/gen.sh src/preprocessor preprocessors + scripts/gen.sh tremor-interceptor/src/preprocessor preprocessors postprocessor-docs: - scripts/gen.sh src/postprocessor postprocessors + scripts/gen.sh tremor-interceptor/src/postprocessor postprocessors codec-docs: scripts/gen.sh tremor-codec/src/codec codecs diff --git a/src/connectors/tests/http/client.rs b/src/connectors/tests/http/client.rs index f20ad29a1b..7450290f66 100644 --- a/src/connectors/tests/http/client.rs +++ b/src/connectors/tests/http/client.rs @@ -17,7 +17,6 @@ use crate::{ impls::http::{self as http_impl, meta::content_type}, prelude::Url, tests::{free_port::find_free_tcp_port, ConnectorHarness}, - utils::url::HttpDefaults, }, errors::Result, }; @@ -34,6 +33,7 @@ use std::{ }; use tokio::task::{spawn, JoinHandle}; use tremor_common::ports::IN; +use tremor_common::url::HttpDefaults; use tremor_pipeline::Event; use tremor_script::ValueAndMeta; use tremor_value::{literal, Value}; diff --git a/src/connectors/tests/ws.rs b/src/connectors/tests/ws.rs index 72a03c29e8..1efb4399db 100644 --- a/src/connectors/tests/ws.rs +++ b/src/connectors/tests/ws.rs @@ -14,8 +14,8 @@ use super::{free_port::find_free_tcp_port, setup_for_tls, ConnectorHarness}; use crate::channel::{bounded, Receiver, Sender, TryRecvError}; +use crate::connectors::impls::ws::WsDefaults; use crate::connectors::{impls::ws, utils::tls::TLSClientConfig}; -use crate::connectors::{impls::ws::WsDefaults, utils::url::Url}; use crate::errors::{Result, ResultExt}; use futures::SinkExt; use futures::StreamExt; @@ -46,6 +46,7 @@ use tokio_tungstenite::{ WebSocketStream, }; use tremor_common::ports::IN; +use tremor_common::url::Url; use tremor_pipeline::{Event, EventId}; use tremor_value::{literal, prelude::*, Value}; diff --git a/src/lib.rs b/src/lib.rs index 74049bd278..f1a1eb7964 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,10 +24,6 @@ clippy::pedantic, clippy::mod_module_files )] -// TODO this is needed due to a false positive in clippy -// https://github.com/rust-lang/rust/issues/83125 -// we will need this in 1.53.1 -#![allow(proc_macro_back_compat)] #[macro_use] extern crate serde; diff --git a/tremor-cli/Cargo.toml b/tremor-cli/Cargo.toml index 8cdafb8111..37409bdc49 100644 --- a/tremor-cli/Cargo.toml +++ b/tremor-cli/Cargo.toml @@ -22,7 +22,6 @@ pretty_assertions = "1.4" [dependencies] tokio = { version = "1.32", features = ["full"] } -# tokio-metrics = { version = "0.1.0", default-features = true } anyhow = "1" clap = { version = "4", features = ["color", "derive"] } clap_complete = "4" @@ -31,7 +30,6 @@ env_logger = "0.10" futures = "0.3" halfbrown = "0.2" http-types = "2" -# jemallocator = {version = "0.3", optional = false} log = "0.4" log4rs = "1" serde = "1" @@ -56,8 +54,6 @@ tremor-runtime = { version = "0.13.0-rc.16", path = "../" } tremor-script = { version = "0.13.0-rc.16", path = "../tremor-script" } tremor-value = { version = "0.13.0-rc.16", path = "../tremor-value" } url = "2" -# mimalloc-rs = { version = "0.1", default-features = true, optional = true } -# allocator_api = "0.6.0" error-chain = "0.12" globwalk = "0.8" port_scanner = "0.1" diff --git a/tremor-codec/Cargo.toml b/tremor-codec/Cargo.toml index d4f22cac81..fc99b61921 100644 --- a/tremor-codec/Cargo.toml +++ b/tremor-codec/Cargo.toml @@ -39,8 +39,6 @@ schema_registry_converter = { version = "3", default-features = false, features "easy", ], git = "https://github.com/tremor-rs/schema_registry_converter.git", branch = "housekeeping" } -# path = "schema_registry_converter" - # codecs reqwest = { version = "0.11", default-features = false, features = [ "rustls-tls", diff --git a/tremor-codec/src/codec/avro.rs b/tremor-codec/src/codec/avro.rs index 59a610d70c..e82745357e 100644 --- a/tremor-codec/src/codec/avro.rs +++ b/tremor-codec/src/codec/avro.rs @@ -16,6 +16,13 @@ //! //! The codec is configured with a codec following the avro json codec specification //! +//! ## Configuration +//! +//! | value | optional | description | +//! |-------|----------|-------------| +//! | `schema` | no | The avro schema to use | +//! | `compression` | yes | The compression codec to use, one of `deflate`, `snappy`, `zstd`, `bzip2`, `xz`, `none` | +//! //! ## Mappings //! //! | avro | tremor (to) | tremor (from) | @@ -670,34 +677,6 @@ mod test { Ok(()) } - #[tokio::test(flavor = "multi_thread")] - async fn decode_smaple() -> Result<()> { - // [b'O', b'b', b'j', 1u8] - let from_kafka = vec![0_u8, 0, 0, 0, 1, 12, 115, 116, 114, 105, 110, 103]; - // let from_kafka = vec![b'O', b'b', b'j', 1_u8, 12, 115, 116, 114, 105, 110, 103]; - // let mut from_kafka = vec![12, 115, 116, 114, 105, 110, 103_u8]; - - let mut codec = test_codec(literal!( - { - "type": "record", - "name": "record", - "fields": [ - {"name": "one", "type": "string"}, - ] - } - ))?; - - let decoded = literal!({"one": "string"}); - - let mut encoded = codec.encode(&decoded, &Value::const_null()).await?; - assert_eq!(encoded, from_kafka); - - codec - .decode(&mut encoded, 0, Value::object()) - .await? - .expect("no data"); - Ok(()) - } #[tokio::test(flavor = "multi_thread")] async fn round_robin() -> Result<()> { let mut codec = test_codec(literal!( diff --git a/tremor-codec/src/codec/kafka_schema_registry.rs b/tremor-codec/src/codec/kafka_schema_registry.rs index 4de5aa7633..8fb5df20e0 100644 --- a/tremor-codec/src/codec/kafka_schema_registry.rs +++ b/tremor-codec/src/codec/kafka_schema_registry.rs @@ -16,9 +16,13 @@ //! //! The codec is configured with a codec following the avro json codec specification //! +//! ## Configuration +//! +//! - `url`: the `url` configuration is used to point to the root of the schema registry server +//! //! ## Mappings //! -//! The same as the [`avro` codec](../avro) +//! The same as the [`avro` codec](./avro) use crate::{ avro::{avro_to_value, value_to_avro, SchemaResover, SchemaWrapper}, @@ -101,7 +105,7 @@ impl SchemaResover for RecordResolver<'_> { #[async_trait::async_trait()] impl Codec for Ksr { fn name(&self) -> &str { - todo!() + "kafka-schema-registry" } async fn decode<'input>( @@ -148,3 +152,29 @@ impl Codec for Ksr { Box::new(self.clone()) } } + +#[cfg(test)] +mod test { + use super::*; + + // Test if the codec can be created from config + #[test] + fn test_codec_creation() { + let config = literal!({"url":"http://localhost:8081"}); + let codec = Ksr::from_config(Some(&config)).expect("invalid config"); + assert_eq!(codec.name(), "kafka-schema-registry"); + } + + #[test] + fn invalid_config() { + let config = literal!({}); + let codec = Ksr::from_config(Some(&config)); + assert!(codec.is_err()); + } + #[test] + fn invalid_url() { + let config = literal!({"url":"loc alhost:8081"}); + let codec = Ksr::from_config(Some(&config)); + assert!(codec.is_err()); + } +} diff --git a/tremor-common/Cargo.toml b/tremor-common/Cargo.toml index 9184f005dc..2f6061fc45 100644 --- a/tremor-common/Cargo.toml +++ b/tremor-common/Cargo.toml @@ -13,7 +13,7 @@ tokio = { version = "1", features = ["full"] } rand = { version = "0.8", features = ["small_rng"] } beef = { version = "0.5", features = ["impl_serde"] } serde = "1" -url = "2" +url = { version = "2", features = ["serde"] } simd-json = { version = "0.11", features = ["known-key"] } simd-json-derive = "0.11" base64 = "0.21" diff --git a/tremor-config/src/lib.rs b/tremor-config/src/lib.rs index 92c3e0c2db..ca3f552136 100644 --- a/tremor-config/src/lib.rs +++ b/tremor-config/src/lib.rs @@ -12,6 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. +//! Tremor shared configuration + +#![deny(warnings)] +#![deny(missing_docs)] +#![recursion_limit = "1024"] +#![deny( + clippy::all, + clippy::unwrap_used, + clippy::unnecessary_unwrap, + clippy::pedantic, + clippy::mod_module_files +)] + use serde::Deserialize; use tremor_value::prelude::*; @@ -78,6 +91,7 @@ impl<'v> TryFrom<&Value<'v>> for NameWithConfig { /// Error for confdig #[derive(Debug, Clone, PartialEq)] pub enum Error { + /// malformed configuration InvalidConfig(String), } @@ -180,4 +194,28 @@ mod test { assert_eq!(nac.mime_mapping.map(|h| h.len()).unwrap_or_default(), 3); } + + // Test if the invlaid configs give errors + #[test] + fn name_with_config_invalid() { + let v = literal!({"name": "json", "config": {"mode": "sorted"}}); + let nac = NameWithConfig::try_from(&v).expect("could structurize two element struct"); + assert_eq!(nac.name, "json"); + assert!(nac.config.as_object().is_some()); + let v = literal!({"name": "yaml"}); + let nac = NameWithConfig::try_from(&v).expect("could structurize one element struct"); + assert_eq!(nac.name, "yaml"); + assert_eq!(nac.config, None); + let v = literal!("name"); + let nac = NameWithConfig::try_from(&v).expect("could structurize string"); + assert_eq!(nac.name, "name"); + assert_eq!(nac.config, None); + let v = literal!({"snot": "yaml"}); + let nac = NameWithConfig::try_from(&v); + assert!(nac.is_err()); + assert_eq!( + nac.err().map(|e| e.to_string()).expect("err"), + r#"Invalid config: {"snot":"yaml"}"# + ); + } } diff --git a/tremor-pipeline/Cargo.toml b/tremor-pipeline/Cargo.toml index 79277442af..20e16ae39a 100644 --- a/tremor-pipeline/Cargo.toml +++ b/tremor-pipeline/Cargo.toml @@ -16,7 +16,7 @@ indexmap = "2" rand = { version = "0.8", features = ["small_rng"] } lazy_static = "1" log = "0.4" -lru = "0.11" +lru = "0.12" petgraph = "0.6" regex = "1" rust-bert = { version = "0.21.0", optional = true } diff --git a/tremor-script-nif/Cargo.toml b/tremor-script-nif/Cargo.toml index 6181ec5083..2e2f00d20e 100644 --- a/tremor-script-nif/Cargo.toml +++ b/tremor-script-nif/Cargo.toml @@ -16,7 +16,7 @@ name = "tremor" crate-type = ["dylib"] [dependencies] -rustler = "0.29" +rustler = "0.30" tremor-script = { path = "../tremor-script" } [features] From aedc854fb9845185bb909ea68f37418cdeddd67e Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Mon, 16 Oct 2023 09:33:29 +0200 Subject: [PATCH 7/9] Apply suggestions from code review Co-authored-by: Matthias Wahl Signed-off-by: Heinz N. Gies --- tremor-codec/src/codec/avro.rs | 6 +++--- tremor-codec/src/codec/kafka_schema_registry.rs | 4 ++-- tremor-codec/src/lib.rs | 2 +- tremor-interceptor/Cargo.toml | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tremor-codec/src/codec/avro.rs b/tremor-codec/src/codec/avro.rs index e82745357e..8412bce654 100644 --- a/tremor-codec/src/codec/avro.rs +++ b/tremor-codec/src/codec/avro.rs @@ -130,12 +130,12 @@ impl<'a> SchemaWrapper<'a> { } } #[async_trait::async_trait] -pub(crate) trait SchemaResover { +pub(crate) trait SchemaResolver { async fn by_name(&self, name: &Name) -> Option; } #[async_trait::async_trait] -impl SchemaResover for AvroRegistry { +impl SchemaResolver for AvroRegistry { async fn by_name(&self, name: &Name) -> Option { self.by_name.get(name).map(SchemaWrapper::Ref) } @@ -149,7 +149,7 @@ pub(crate) async fn value_to_avro<'v, R>( resolver: &R, ) -> Result where - R: SchemaResover + Sync, + R: SchemaResolver + Sync, { Ok(match schema { Schema::Null => { diff --git a/tremor-codec/src/codec/kafka_schema_registry.rs b/tremor-codec/src/codec/kafka_schema_registry.rs index 8fb5df20e0..797ca092c5 100644 --- a/tremor-codec/src/codec/kafka_schema_registry.rs +++ b/tremor-codec/src/codec/kafka_schema_registry.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! The `avro` codec supports Apache Avro binary encoding. +//! The `kafka-schema-registry` codec supports Apache Avro binary encoding. //! //! The codec is configured with a codec following the avro json codec specification //! @@ -89,7 +89,7 @@ struct RecordResolver<'a> { } #[async_trait::async_trait] -impl SchemaResover for RecordResolver<'_> { +impl SchemaResolver for RecordResolver<'_> { async fn by_name(&self, name: &Name) -> Option { self.encoder .get_schema_and_id( diff --git a/tremor-codec/src/lib.rs b/tremor-codec/src/lib.rs index 7f58983b01..4aaf9dcf0b 100644 --- a/tremor-codec/src/lib.rs +++ b/tremor-codec/src/lib.rs @@ -59,7 +59,7 @@ mod prelude { pub use tremor_value::{literal, Object, Value}; } -/// A Codec +/// Configuration, commonly used for codecs pub type Config = tremor_config::NameWithConfig; #[async_trait::async_trait] diff --git a/tremor-interceptor/Cargo.toml b/tremor-interceptor/Cargo.toml index 3f5ffb56cd..eb34c80516 100644 --- a/tremor-interceptor/Cargo.toml +++ b/tremor-interceptor/Cargo.toml @@ -2,7 +2,7 @@ name = "tremor-interceptor" edition = "2021" authors = ["The Tremor Team"] -description = "Tremor Runtime" +description = "Tremor Preprocessors, Postprocessors, and maybe soon: Interceptors" documentation = "https://docs.tremor.rs" homepage = "https://www.tremor.rs" license = "Apache-2.0" From b753356311c805a5ba35be271261c224342d66f7 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Mon, 16 Oct 2023 09:40:00 +0200 Subject: [PATCH 8/9] include feedback Signed-off-by: Heinz N. Gies --- tremor-codec/src/codec/avro.rs | 2 +- ...gistry.rs => confluent_schema_registry.rs} | 33 ++++++++++--------- tremor-codec/src/lib.rs | 6 ++-- 3 files changed, 23 insertions(+), 18 deletions(-) rename tremor-codec/src/codec/{kafka_schema_registry.rs => confluent_schema_registry.rs} (82%) diff --git a/tremor-codec/src/codec/avro.rs b/tremor-codec/src/codec/avro.rs index 8412bce654..216d597674 100644 --- a/tremor-codec/src/codec/avro.rs +++ b/tremor-codec/src/codec/avro.rs @@ -14,7 +14,7 @@ //! The `avro` codec supports Apache Avro binary encoding. //! -//! The codec is configured with a codec following the avro json codec specification +//! The codec is configured with a schema following the avro schema specification //! //! ## Configuration //! diff --git a/tremor-codec/src/codec/kafka_schema_registry.rs b/tremor-codec/src/codec/confluent_schema_registry.rs similarity index 82% rename from tremor-codec/src/codec/kafka_schema_registry.rs rename to tremor-codec/src/codec/confluent_schema_registry.rs index 797ca092c5..b7961611b5 100644 --- a/tremor-codec/src/codec/kafka_schema_registry.rs +++ b/tremor-codec/src/codec/confluent_schema_registry.rs @@ -12,9 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! The `kafka-schema-registry` codec supports Apache Avro binary encoding. +//! The `confluent-schema-registry` codec allows using the [Confluent Schema Registry](https://docs.confluent.io/platform/current/schema-registry/index.html) +//! as a source for avro decoding information. //! -//! The codec is configured with a codec following the avro json codec specification +//! It can be used in combination with a kafka topic that encodes it's content in avro format and stores it's schema in the schema registry. +//! +//! For decoding avro data (from kafka or otherwise) that is manually encoded please use the [avro](./avro) codec. //! //! ## Configuration //! @@ -25,7 +28,7 @@ //! The same as the [`avro` codec](./avro) use crate::{ - avro::{avro_to_value, value_to_avro, SchemaResover, SchemaWrapper}, + avro::{avro_to_value, value_to_avro, SchemaResolver, SchemaWrapper}, prelude::*, }; use apache_avro::schema::Name; @@ -36,7 +39,7 @@ use schema_registry_converter::{ }; use tremor_common::url::{HttpDefaults, Url}; -pub struct Ksr { +pub struct Csr { registry: Url, settings: SrSettings, decoder: AvroDecoder, @@ -44,16 +47,16 @@ pub struct Ksr { } #[allow(clippy::missing_fields_in_debug)] -impl std::fmt::Debug for Ksr { +impl std::fmt::Debug for Csr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("KSR") + f.debug_struct("Csr") .field("registry", &self.registry) .field("settings", &self.settings) .finish() } } -impl Clone for Ksr { +impl Clone for Csr { fn clone(&self) -> Self { Self { registry: self.registry.clone(), @@ -64,7 +67,7 @@ impl Clone for Ksr { } } -impl Ksr { +impl Csr { pub(crate) fn from_config(config: Option<&Value>) -> Result> { let url = config .get_str("url") @@ -75,7 +78,7 @@ impl Ksr { let settings = SrSettings::new(url.to_string()); let decoder = AvroDecoder::new(settings.clone()); let encoder = AvroEncoder::new(settings.clone()); - Ok(Box::new(Ksr { + Ok(Box::new(Csr { registry, settings, decoder, @@ -103,9 +106,9 @@ impl SchemaResolver for RecordResolver<'_> { } #[async_trait::async_trait()] -impl Codec for Ksr { +impl Codec for Csr { fn name(&self) -> &str { - "kafka-schema-registry" + "confluent-schema-registry" } async fn decode<'input>( @@ -161,20 +164,20 @@ mod test { #[test] fn test_codec_creation() { let config = literal!({"url":"http://localhost:8081"}); - let codec = Ksr::from_config(Some(&config)).expect("invalid config"); - assert_eq!(codec.name(), "kafka-schema-registry"); + let codec = Csr::from_config(Some(&config)).expect("invalid config"); + assert_eq!(codec.name(), "confluent-schema-registry"); } #[test] fn invalid_config() { let config = literal!({}); - let codec = Ksr::from_config(Some(&config)); + let codec = Csr::from_config(Some(&config)); assert!(codec.is_err()); } #[test] fn invalid_url() { let config = literal!({"url":"loc alhost:8081"}); - let codec = Ksr::from_config(Some(&config)); + let codec = Csr::from_config(Some(&config)); assert!(codec.is_err()); } } diff --git a/tremor-codec/src/lib.rs b/tremor-codec/src/lib.rs index 4aaf9dcf0b..795b01345f 100644 --- a/tremor-codec/src/lib.rs +++ b/tremor-codec/src/lib.rs @@ -34,12 +34,12 @@ mod codec { pub(crate) mod avro; pub(crate) mod binary; pub(crate) mod binflux; + pub(crate) mod confluent_schema_registry; pub(crate) mod csv; pub(crate) mod dogstatsd; pub(crate) mod influx; /// JSON codec pub mod json; - pub(crate) mod kafka_schema_registry; pub(crate) mod msgpack; pub(crate) mod null; pub(crate) mod statsd; @@ -122,7 +122,9 @@ impl Debug for dyn Codec { pub fn resolve(config: &Config) -> Result> { match config.name.as_str() { "avro" => avro::Avro::from_config(config.config.as_ref()), - "kafka-schema-registry" => kafka_schema_registry::Ksr::from_config(config.config.as_ref()), + "confluent-schema-registry" => { + confluent_schema_registry::Csr::from_config(config.config.as_ref()) + } "binary" => Ok(Box::new(binary::Binary {})), "binflux" => Ok(Box::::default()), "csv" => Ok(Box::new(csv::Csv {})), From 2d1aa1a51da888b9d53b6b6bbbed92feaafbae95 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Mon, 16 Oct 2023 10:26:27 +0200 Subject: [PATCH 9/9] simd-json update Signed-off-by: Heinz N. Gies --- Cargo.lock | 192 ++++++++++++++++++--------------- Cargo.toml | 4 +- tremor-api/Cargo.toml | 2 +- tremor-cli/Cargo.toml | 2 +- tremor-codec/Cargo.toml | 4 +- tremor-codec/src/codec/json.rs | 28 ++--- tremor-common/Cargo.toml | 4 +- tremor-config/Cargo.toml | 2 +- tremor-influx/Cargo.toml | 2 +- tremor-interceptor/Cargo.toml | 2 +- tremor-pipeline/Cargo.toml | 4 +- tremor-script/Cargo.toml | 6 +- tremor-value/Cargo.toml | 4 +- tremor-value/src/lib.rs | 6 +- tremor-value/src/value.rs | 13 ++- 15 files changed, 142 insertions(+), 133 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5fbdcdc047..38046cd0cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -252,9 +252,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb42b2197bf15ccb092b62c74515dbd8b86d0effd934795f6687c93b6e679a2c" +checksum = "f658e2baef915ba0f26f1f7c42bfb8e12f532a01f449a090ded75ae7a07e9ba2" dependencies = [ "flate2", "futures-core", @@ -334,7 +334,7 @@ dependencies = [ "log", "parking", "polling", - "rustix 0.37.24", + "rustix 0.37.25", "slab", "socket2 0.4.9", "waker-fn", @@ -362,7 +362,7 @@ dependencies = [ "cfg-if", "event-listener 3.0.0", "futures-lite", - "rustix 0.38.18", + "rustix 0.38.19", "windows-sys 0.48.0", ] @@ -389,7 +389,7 @@ dependencies = [ "cfg-if", "futures-core", "futures-io", - "rustix 0.38.18", + "rustix 0.38.19", "signal-hook-registry", "slab", "windows-sys 0.48.0", @@ -479,9 +479,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.73" +version = "0.1.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" +checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" dependencies = [ "proc-macro2", "quote", @@ -580,7 +580,7 @@ dependencies = [ "http", "hyper", "ring 0.16.20", - "time 0.3.29", + "time 0.3.30", "tokio", "tower", "tracing", @@ -739,7 +739,7 @@ dependencies = [ "percent-encoding", "regex", "sha2 0.10.8", - "time 0.3.29", + "time 0.3.30", "tracing", ] @@ -917,7 +917,7 @@ dependencies = [ "num-integer", "ryu", "serde", - "time 0.3.29", + "time 0.3.30", ] [[package]] @@ -1047,9 +1047,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" [[package]] name = "block-buffer" @@ -1810,10 +1810,11 @@ checksum = "f578e8e2c440e7297e008bb5486a3a8a194775224bbc23729b0dbdfaeebf162e" [[package]] name = "deranged" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" +checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" dependencies = [ + "powerfmt", "serde", ] @@ -1915,11 +1916,11 @@ checksum = "212d0f5754cb6769937f4501cc0e67f4f4483c8d2c3e1e922ee9edbe4ab4c7c0" [[package]] name = "dissect" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1611d8efce6766178515e99dc2cd5727cd846acfc1649a750585af7187cbac00" +checksum = "927637a5b256e00fcdc83b705a2924cce2ecb4c82aeed16eb111102056b6d7e6" dependencies = [ - "simd-json", + "simd-json 0.12.0", ] [[package]] @@ -2120,9 +2121,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flate2" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" dependencies = [ "crc32fast", "miniz_oxide", @@ -2901,7 +2902,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ "hermit-abi 0.3.3", - "rustix 0.38.18", + "rustix 0.38.19", "windows-sys 0.48.0", ] @@ -3573,7 +3574,7 @@ version = "0.10.57" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bac25ee399abb46215765b1cb35bc0212377e58a061560d8b29b024fd0430e7c" dependencies = [ - "bitflags 2.4.0", + "bitflags 2.4.1", "cfg-if", "foreign-types", "libc", @@ -3923,6 +3924,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "325a6d2ac5dee293c3b2612d4993b98aec1dff096b0a2dae70ed7d95784a05da" +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -4002,7 +4009,7 @@ checksum = "7c003ac8c77cb07bb74f5f198bce836a689bcd5a42574612bf14d17bfd08c20e" dependencies = [ "bit-set", "bit-vec", - "bitflags 2.4.0", + "bitflags 2.4.1", "lazy_static", "num-traits", "rand 0.8.5", @@ -4271,32 +4278,32 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d119d7c7ca818f8a53c300863d4f87566aac09943aef5b355bb83969dae75d87" +checksum = "aaac441002f822bc9705a681810a4dd2963094b9ca0ddc41cb963a4c189189ea" dependencies = [ "aho-corasick", "memchr", "regex-automata", - "regex-syntax 0.8.1", + "regex-syntax 0.8.2", ] [[package]] name = "regex-automata" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "465c6fc0621e4abc4187a2bda0937bfd4f722c2730b29562e19689ea796c9a4b" +checksum = "5011c7e263a695dc8ca064cddb722af1be54e517a280b12a5356f98366899e5d" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.1", + "regex-syntax 0.8.2", ] [[package]] name = "regex-lite" -version = "0.1.3" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a6ebcd15653947e6140f59a9811a06ed061d18a5c35dfca2e2e4c5525696878" +checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e" [[package]] name = "regex-syntax" @@ -4306,9 +4313,9 @@ checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" [[package]] name = "regex-syntax" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56d84fdd47036b038fc80dd333d10b6aab10d5d31f4a366e20014def75328d33" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "reqwest" @@ -4385,9 +4392,9 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.3" +version = "0.17.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9babe80d5c16becf6594aa32ad2be8fe08498e7ae60b77de8df700e67f191d7e" +checksum = "fce3045ffa7c981a6ee93f640b538952e155f1ae3a1a02b84547fc7a56b7059a" dependencies = [ "cc", "getrandom 0.2.10", @@ -4497,9 +4504,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.24" +version = "0.37.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4279d76516df406a8bd37e7dff53fd37d1a093f997a3c34a5c21658c126db06d" +checksum = "d4eb579851244c2c03e7c24f501c3432bed80b8f720af1d6e5b0e0f01555a035" dependencies = [ "bitflags 1.3.2", "errno", @@ -4511,11 +4518,11 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.18" +version = "0.38.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a74ee2d7c2581cd139b42447d7d9389b889bdaad3a73f1ebb16f2a3237bb19c" +checksum = "745ecfa778e66b2b63c88a61cb36e0eea109e803b0b86bf9879fbc77c70e86ed" dependencies = [ - "bitflags 2.4.0", + "bitflags 2.4.1", "errno", "libc", "linux-raw-sys 0.4.10", @@ -4658,7 +4665,7 @@ dependencies = [ [[package]] name = "schema_registry_converter" version = "3.1.0" -source = "git+https://github.com/tremor-rs/schema_registry_converter.git?branch=housekeeping#8df31e3e250ead445349432a5b5803d0c9985f2d" +source = "git+https://github.com/tremor-rs/schema_registry_converter.git?branch=housekeeping#cc85ba752f99ad43ec4d1f43d4018ea768888bb0" dependencies = [ "apache-avro", "byteorder", @@ -4742,9 +4749,9 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" -version = "1.0.188" +version = "1.0.189" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +checksum = "8e422a44e74ad4001bdc8eede9a4570ab52f71190e9c076d14369f38b9200537" dependencies = [ "serde_derive", ] @@ -4761,9 +4768,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.188" +version = "1.0.189" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +checksum = "1e48d1f918009ce3145511378cf68d613e3b3d9137d67272562080d68a2b32d5" dependencies = [ "proc-macro2", "quote", @@ -4895,7 +4902,7 @@ dependencies = [ "serde", "serde-value", "serde_json", - "time 0.3.29", + "time 0.3.30", "tokio", "tracing", "typemap_rev", @@ -5042,29 +5049,46 @@ dependencies = [ "value-trait", ] +[[package]] +name = "simd-json" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f07a84c7456b901b8dd2c1d44caca8b0fd2c2616206ee5acc9d9da61e8d9ec" +dependencies = [ + "ahash", + "getrandom 0.2.10", + "halfbrown", + "lexical-core 0.8.5", + "once_cell", + "serde", + "serde_json", + "simdutf8", + "value-trait", +] + [[package]] name = "simd-json-derive" -version = "0.11.3" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aaa9752381f22a384f50938d39bb22d34053a6672f0fbadb38716e7ba5821f33" +checksum = "601202a467f1d17a37b20d6d4d7bd679764ac3e9e9fc0f7a24524ba35bd55552" dependencies = [ "chrono", "itoa 1.0.9", "ryu", - "simd-json", + "simd-json 0.12.0", "simd-json-derive-int", "value-trait", ] [[package]] name = "simd-json-derive-int" -version = "0.11.3" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a53e29f4fa3048cfe9452895aa77d52b7d9ecd930011e780368cc5985866006" +checksum = "4826470c4520fac1e3b57c413a6e83a198bac992e3222d1c290f7909881d3df1" dependencies = [ "proc-macro2", "quote", - "simd-json", + "simd-json 0.12.0", "syn 2.0.38", ] @@ -5293,9 +5317,9 @@ checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" [[package]] name = "strum_macros" -version = "0.25.2" +version = "0.25.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad8d03b598d3d0fff69bf533ee3ef19b8eeb342729596df84bcc7e1f96ec4059" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" dependencies = [ "heck 0.4.1", "proc-macro2", @@ -5498,7 +5522,7 @@ dependencies = [ "cfg-if", "fastrand 2.0.1", "redox_syscall 0.3.5", - "rustix 0.38.18", + "rustix 0.38.19", "windows-sys 0.48.0", ] @@ -5655,12 +5679,13 @@ dependencies = [ [[package]] name = "time" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "426f806f4089c493dcac0d24c29c01e2c38baf8e30f1b716ee37e83d200b18fe" +checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" dependencies = [ "deranged", "itoa 1.0.9", + "powerfmt", "serde", "time-core", "time-macros 0.2.15", @@ -5978,11 +6003,10 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.37" +version = "0.1.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +checksum = "ee2ef2af84856a50c1d430afce2fdded0a4ec7eda868db86409b4543df0797f9" dependencies = [ - "cfg-if", "log", "pin-project-lite 0.2.13", "tracing-attributes", @@ -5991,9 +6015,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", @@ -6002,9 +6026,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", ] @@ -6029,7 +6053,7 @@ dependencies = [ "log", "serde", "serde_yaml 0.9.25", - "simd-json", + "simd-json 0.12.0", "surf", "tide", "tokio", @@ -6067,7 +6091,7 @@ dependencies = [ "shell-words", "signal-hook", "signal-hook-tokio", - "simd-json", + "simd-json 0.12.0", "snmalloc-rs", "surf", "tch", @@ -6108,7 +6132,7 @@ dependencies = [ "schema_registry_converter", "serde", "serde_yaml 0.9.25", - "simd-json", + "simd-json 0.12.0", "simd-json-derive", "simdutf8", "syslog_loose", @@ -6132,7 +6156,7 @@ dependencies = [ "rand 0.8.5", "regex", "serde", - "simd-json", + "simd-json 0.12.0", "simd-json-derive", "test-case", "tokio", @@ -6144,7 +6168,7 @@ name = "tremor-config" version = "0.13.0-rc.16" dependencies = [ "serde", - "simd-json", + "simd-json 0.12.0", "tremor-value", ] @@ -6155,7 +6179,7 @@ dependencies = [ "criterion", "lexical", "pretty_assertions", - "simd-json", + "simd-json 0.12.0", "snmalloc-rs", "value-trait", ] @@ -6174,7 +6198,7 @@ dependencies = [ "proptest", "rand 0.8.5", "serde", - "simd-json", + "simd-json 0.12.0", "snap", "tremor-common", "tremor-config", @@ -6191,7 +6215,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58f4167091558a2c0a46b183cf1a005b1e9e0e1aaeb365ad8d0847685ffc8542" dependencies = [ "serde", - "simd-json", + "simd-json 0.11.1", ] [[package]] @@ -6225,7 +6249,7 @@ dependencies = [ "rust-bert", "serde", "serde_yaml 0.9.25", - "simd-json", + "simd-json 0.12.0", "simd-json-derive", "sled", "tempfile", @@ -6314,7 +6338,7 @@ dependencies = [ "serial_test", "signal-hook", "signal-hook-tokio", - "simd-json", + "simd-json 0.12.0", "simd-json-derive", "simdutf8", "sled", @@ -6377,7 +6401,7 @@ dependencies = [ "regex", "serde", "sha2 0.10.8", - "simd-json", + "simd-json 0.12.0", "simd-json-derive", "sketches-ddsketch", "strip-ansi-escapes", @@ -6407,7 +6431,7 @@ dependencies = [ "proptest", "serde", "serde_json", - "simd-json", + "simd-json 0.12.0", "simd-json-derive", "tremor-common", "value-trait", @@ -6415,9 +6439,9 @@ dependencies = [ [[package]] name = "trust-dns-proto" -version = "0.23.0" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dc775440033cb114085f6f2437682b194fa7546466024b1037e82a48a052a69" +checksum = "559ac980345f7f5020883dd3bcacf176355225e01916f8c2efecad7534f682c6" dependencies = [ "async-trait", "cfg-if", @@ -6440,9 +6464,9 @@ dependencies = [ [[package]] name = "trust-dns-resolver" -version = "0.23.0" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff7aed33ef3e8bf2c9966fccdfed93f93d46f432282ea875cd66faabc6ef2f" +checksum = "c723b0e608b24ad04c73b2607e0241b2c98fd79795a95e98b068b6966138a29d" dependencies = [ "cfg-if", "futures-util", @@ -6929,7 +6953,7 @@ version = "0.22.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed63aea5ce73d0ff405984102c42de94fc55a6b75765d621c65262469b3c9b53" dependencies = [ - "ring 0.17.3", + "ring 0.17.4", "untrusted 0.9.0", ] @@ -6975,7 +6999,7 @@ dependencies = [ "either", "home", "once_cell", - "rustix 0.38.18", + "rustix 0.38.19", ] [[package]] @@ -7172,9 +7196,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "winnow" -version = "0.5.16" +version = "0.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "037711d82167854aff2018dfd193aa0fef5370f456732f0d5a0c59b0f1b4b907" +checksum = "a3b801d0e0a6726477cc207f60162da452f3a95adb368399bef20a946e06f65c" dependencies = [ "memchr", ] @@ -7250,7 +7274,7 @@ dependencies = [ "hmac 0.12.1", "pbkdf2", "sha1 0.10.6", - "time 0.3.29", + "time 0.3.30", "zstd 0.11.2+zstd.1.5.2", ] diff --git a/Cargo.toml b/Cargo.toml index a1e43791e0..c6fc25c7b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -83,8 +83,8 @@ rand = "0.8.5" regex = "1.9" serde = { version = "1", features = ["derive"] } serde_yaml = "0.9" -simd-json = { version = "0.11", features = ["known-key"] } -simd-json-derive = "0.11" +simd-json = { version = "0.12", features = ["known-key"] } +simd-json-derive = "0.12" socket2 = { version = "0.5", features = ["all"] } tremor-common = { path = "tremor-common" } tremor-config = { path = "tremor-config" } diff --git a/tremor-api/Cargo.toml b/tremor-api/Cargo.toml index b91896a400..bcc00b604d 100644 --- a/tremor-api/Cargo.toml +++ b/tremor-api/Cargo.toml @@ -12,7 +12,7 @@ http-types = "2" log = "0.4" serde = "1" serde_yaml = "0.9" -simd-json = "0.11" +simd-json = "0.12" tokio = { version = "1.32", features = ["full"] } # we don't need sessions or cookies or shitty logging middleware tide = { version = "0.16", default-features = false, features = ["h1-server"] } diff --git a/tremor-cli/Cargo.toml b/tremor-cli/Cargo.toml index 37409bdc49..21cdd0ce15 100644 --- a/tremor-cli/Cargo.toml +++ b/tremor-cli/Cargo.toml @@ -36,7 +36,7 @@ serde = "1" serde_yaml = "0.9" signal-hook = "0.3" signal-hook-tokio = { version = "0.3", features = ["futures-v0_3"] } -simd-json = { version = "0.11", features = ["known-key"] } +simd-json = { version = "0.12", features = ["known-key"] } # We need to stay with 0.2 for now as there are reasons that can be named for the need to be able to # compile and run on operating systems that are a decade old. (insert apropriate ammount rage) snmalloc-rs = { version = "0.3" } diff --git a/tremor-codec/Cargo.toml b/tremor-codec/Cargo.toml index fc99b61921..6e61d0689e 100644 --- a/tremor-codec/Cargo.toml +++ b/tremor-codec/Cargo.toml @@ -18,7 +18,7 @@ tokio = { version = "1.32", features = ["full"] } async-trait = "0.1" error-chain = "0.12" futures = "0.3" -simd-json-derive = "0.11" +simd-json-derive = "0.12" value-trait = "0.6" beef = "0.5" test-case = "3.1" @@ -46,7 +46,7 @@ reqwest = { version = "0.11", default-features = false, features = [ ] } csv = "1.2" tremor-influx = { version = "0.13.0-rc.16", path = "../tremor-influx" } -simd-json = "0.11" +simd-json = "0.12" apache-avro = { version = "0.16", features = [ "snappy", "bzip", diff --git a/tremor-codec/src/codec/json.rs b/tremor-codec/src/codec/json.rs index e5cc32c4cc..2a57a89e69 100644 --- a/tremor-codec/src/codec/json.rs +++ b/tremor-codec/src/codec/json.rs @@ -23,9 +23,9 @@ //! The codec can be configured with a mode, either `sorted` or `unsorted`. The default is `unsorted` as it is singnificantly faster, `sorted` json is only needed in testing situations where the key order in maps matters for compairson. use crate::prelude::*; -use std::{cmp::max, marker::PhantomData}; +use simd_json::Buffers; +use std::marker::PhantomData; use tremor_value::utils::sorted_serialize; -use tremor_value::AlignedBuf; /// Sorting for JSON pub trait Sorting: Sync + Send + Copy + Clone + 'static { @@ -50,8 +50,7 @@ impl Sorting for Unsorted { /// JSON codec pub struct Json { _phantom: PhantomData, - input_buffer: AlignedBuf, - string_buffer: Vec, + buffers: Buffers, data_buf: Vec, } @@ -65,8 +64,7 @@ impl Default for Json { fn default() -> Self { Self { _phantom: PhantomData, - input_buffer: AlignedBuf::with_capacity(1024), - string_buffer: vec![0u8; 1024], + buffers: Buffers::new(1024), data_buf: Vec::new(), } } @@ -103,18 +101,9 @@ impl Codec for Json { _ingest_ns: u64, meta: Value<'input>, ) -> Result, Value<'input>)>> { - // The input buffer will be automatically grown if required - if self.string_buffer.capacity() < data.len() { - let new_len = max(self.string_buffer.capacity(), data.len()) * 2; - self.string_buffer.resize(new_len, 0); - } - tremor_value::parse_to_value_with_buffers( - data, - &mut self.input_buffer, - &mut self.string_buffer, - ) - .map(|v| Some((v, meta))) - .map_err(Error::from) + tremor_value::parse_to_value_with_buffers(data, &mut self.buffers) + .map(|v| Some((v, meta))) + .map_err(Error::from) } async fn encode(&mut self, data: &Value, _meta: &Value) -> Result> { if S::SORTED { @@ -140,8 +129,7 @@ mod test { #[tokio::test(flavor = "multi_thread")] async fn decode() -> Result<()> { let mut codec: Json = Json { - input_buffer: AlignedBuf::with_capacity(0), - string_buffer: Vec::new(), + buffers: Buffers::default(), ..Default::default() }; let expected = literal!({ "snot": "badger" }); diff --git a/tremor-common/Cargo.toml b/tremor-common/Cargo.toml index 2f6061fc45..f1821edc4d 100644 --- a/tremor-common/Cargo.toml +++ b/tremor-common/Cargo.toml @@ -14,8 +14,8 @@ rand = { version = "0.8", features = ["small_rng"] } beef = { version = "0.5", features = ["impl_serde"] } serde = "1" url = { version = "2", features = ["serde"] } -simd-json = { version = "0.11", features = ["known-key"] } -simd-json-derive = "0.11" +simd-json = { version = "0.12", features = ["known-key"] } +simd-json-derive = "0.12" base64 = "0.21" regex = "*" lazy_static = "*" diff --git a/tremor-config/Cargo.toml b/tremor-config/Cargo.toml index c9e993aac2..a33af6e152 100644 --- a/tremor-config/Cargo.toml +++ b/tremor-config/Cargo.toml @@ -14,4 +14,4 @@ version = "0.13.0-rc.16" [dependencies] tremor-value = { path = "../tremor-value" } serde = "1" -simd-json = "0.11" +simd-json = "0.12" diff --git a/tremor-influx/Cargo.toml b/tremor-influx/Cargo.toml index f9c128e51f..a157f49207 100644 --- a/tremor-influx/Cargo.toml +++ b/tremor-influx/Cargo.toml @@ -23,7 +23,7 @@ value-trait = "0.6" [dev-dependencies] criterion = "0.5" pretty_assertions = "1.4" -simd-json = "0.11" +simd-json = "0.12" snmalloc-rs = "0.3" [[bench]] diff --git a/tremor-interceptor/Cargo.toml b/tremor-interceptor/Cargo.toml index eb34c80516..df826393b2 100644 --- a/tremor-interceptor/Cargo.toml +++ b/tremor-interceptor/Cargo.toml @@ -20,7 +20,7 @@ tremor-config = { path = "../tremor-config" } tremor-common = { path = "../tremor-common" } log = "0.4" serde = { version = "1", features = ["derive"] } -simd-json = "0.11" +simd-json = "0.12" libflate = "2" xz2 = "0.1" lz4 = "1" diff --git a/tremor-pipeline/Cargo.toml b/tremor-pipeline/Cargo.toml index 20e16ae39a..924fd1c65c 100644 --- a/tremor-pipeline/Cargo.toml +++ b/tremor-pipeline/Cargo.toml @@ -22,8 +22,8 @@ regex = "1" rust-bert = { version = "0.21.0", optional = true } serde = "1" serde_yaml = "0.9" -simd-json = { version = "0.11", features = ["known-key"] } -simd-json-derive = "0.11" +simd-json = { version = "0.12", features = ["known-key"] } +simd-json-derive = "0.12" sled = "0.34" tremor-common = { version = "0.13.0-rc.16", path = "../tremor-common" } tremor-config = { version = "0.13.0-rc.16", path = "../tremor-config" } diff --git a/tremor-script/Cargo.toml b/tremor-script/Cargo.toml index f667187412..806b5144fe 100644 --- a/tremor-script/Cargo.toml +++ b/tremor-script/Cargo.toml @@ -46,8 +46,8 @@ percent-encoding = "2" rand = { version = "0.8", features = ["small_rng"] } regex = "1" serde = { version = "1", features = ["derive"] } -simd-json = { version = "0.11", features = ["known-key"] } -simd-json-derive = "0.11" +simd-json = { version = "0.12", features = ["known-key"] } +simd-json-derive = "0.12" sketches-ddsketch = "0.2" strip-ansi-escapes = "0.2" termcolor = "1.2" @@ -79,8 +79,6 @@ test-case = "3" erlang-float-testing = [] # This is required for the language server to prevent unbounded growth of the area arena-delete = [] -# This is required for the language server as w3e want to allow the use of it without platfor specific flags -allow-non-simd = ["simd-json/allow-non-simd"] [[bench]] name = "array_flatten" diff --git a/tremor-value/Cargo.toml b/tremor-value/Cargo.toml index c5551ada3f..16e2eecc94 100644 --- a/tremor-value/Cargo.toml +++ b/tremor-value/Cargo.toml @@ -16,8 +16,8 @@ base64 = "0.21" beef = "0.5" halfbrown = "0.2" serde = "1.0" -simd-json = "0.11" -simd-json-derive = "0.11" +simd-json = "0.12" +simd-json-derive = "0.12" value-trait = { version = "0.6", features = ["custom-types"] } tremor-common = { version = "0.13.0-rc.16", path = "../tremor-common" } diff --git a/tremor-value/src/lib.rs b/tremor-value/src/lib.rs index f16f066694..c48aca4daa 100644 --- a/tremor-value/src/lib.rs +++ b/tremor-value/src/lib.rs @@ -49,7 +49,7 @@ pub mod value; pub use crate::serde::structurize; pub use error::*; pub use known_key::{Error as KnownKeyError, KnownKey}; -pub use simd_json::{json, json_typed, AlignedBuf, StaticNode}; +pub use simd_json::{json, json_typed, Buffers, StaticNode}; pub use value::from::*; pub use value::{parse_to_value, parse_to_value_with_buffers, to_value, Object, Value}; @@ -102,8 +102,8 @@ impl<'input, 'tape> ValueDeser<'input, 'tape> { match self.0.next() { Some(Node::Static(s)) => Ok(Value::Static(s)), Some(Node::String(s)) => Ok(Value::from(s)), - Some(Node::Array(len, _)) => Ok(self.parse_array(len)), - Some(Node::Object(len, _)) => Ok(self.parse_map(len)), + Some(Node::Array { len, .. }) => Ok(self.parse_array(len)), + Some(Node::Object { len, .. }) => Ok(self.parse_map(len)), None => Err(simd_json::Error::generic(simd_json::ErrorType::Eof)), } } diff --git a/tremor-value/src/value.rs b/tremor-value/src/value.rs index 47ce63ba88..fe160e6dd7 100644 --- a/tremor-value/src/value.rs +++ b/tremor-value/src/value.rs @@ -25,8 +25,8 @@ use crate::{Error, Result}; use beef::Cow; use halfbrown::HashMap; pub use r#static::StaticValue; -use simd_json::{prelude::*, ObjectHasher}; -use simd_json::{AlignedBuf, Deserializer, Node, StaticNode}; +use simd_json::{prelude::*, Buffers, ObjectHasher}; +use simd_json::{Deserializer, Node, StaticNode}; use std::{borrow::Borrow, convert::TryInto, fmt}; use std::{cmp::Ord, hash::Hash}; use std::{ @@ -64,10 +64,9 @@ pub fn parse_to_value(s: &mut [u8]) -> Result { /// Will return `Err` if `s` is invalid JSON. pub fn parse_to_value_with_buffers<'value>( s: &'value mut [u8], - input_buffer: &mut AlignedBuf, - string_buffer: &mut [u8], + buffer: &mut Buffers, ) -> Result> { - match Deserializer::from_slice_with_buffers(s, input_buffer, string_buffer) { + match Deserializer::from_slice_with_buffers(s, buffer) { Ok(de) => Ok(ValueDeserializer::from_deserializer(de).parse()), Err(e) => Err(Error::SimdJson(e)), } @@ -600,8 +599,8 @@ impl<'de> ValueDeserializer<'de> { match unsafe { self.0.next_() } { Node::Static(s) => Value::Static(s), Node::String(s) => Value::from(s), - Node::Array(len, _) => self.parse_array(len), - Node::Object(len, _) => self.parse_map(len), + Node::Array { len, .. } => self.parse_array(len), + Node::Object { len, .. } => self.parse_map(len), } }