Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Custom Flattening for OTEL logs, metrics and traces #1043

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
53ad2ba
feat: custom flattening for OTEL data
nikhilsinhaparseable Dec 17, 2024
58a38e9
feat: enable OTEL traces
nikhilsinhaparseable Dec 19, 2024
90b2433
update data type based on data from otel collector
nikhilsinhaparseable Dec 19, 2024
30625b0
fix: refactor, deepsource fixes
nikhilsinhaparseable Dec 25, 2024
95c2c1e
fix: cargo hack, incorporate review comments
nikhilsinhaparseable Dec 25, 2024
04347bf
Update Cargo.toml
nikhilsinhaparseable Dec 26, 2024
4a45a5d
fix: removed pre-compiled otel files
nikhilsinhaparseable Dec 26, 2024
a74bb0b
restructure: moved otel module from handlers to src
nikhilsinhaparseable Dec 27, 2024
6eea205
suggestions
de-sh Dec 27, 2024
4bcf8f0
Update Cargo.toml
nikhilsinhaparseable Dec 27, 2024
e6ad4f0
dependency: update opentelemetry-proto dependency to parseable fork
nikhilsinhaparseable Jan 2, 2025
b8a9416
fix: trace_id, span_id mapping
nikhilsinhaparseable Jan 3, 2025
e83ed88
transform: epoch nanosecond number to timestamp
nikhilsinhaparseable Jan 3, 2025
1681a75
bucket counts, explicit bounds, quantile values as json array in OTEL…
nikhilsinhaparseable Jan 3, 2025
fa70652
restrict /api/v1/ingest api for otel
nikhilsinhaparseable Jan 3, 2025
47bbb80
restrict generic flattening for otel data
nikhilsinhaparseable Jan 3, 2025
67b4c5a
fix: create metric record in case of empty data
nikhilsinhaparseable Jan 4, 2025
189197c
fix: corrected attribute-intValue to json number
nikhilsinhaparseable Jan 5, 2025
2f091cb
refactor: `LogSource` enum
de-sh Jan 4, 2025
ef0f4e7
fix: generic flattening only for non otel sources
de-sh Jan 5, 2025
ae8f826
linter fix
nikhilsinhaparseable Jan 5, 2025
6908630
fix: rebase from main
nikhilsinhaparseable Jan 5, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 107 additions & 43 deletions Cargo.lock

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,10 @@ humantime-serde = "1.1"
itertools = "0.13.0"
num_cpus = "1.15"
once_cell = "1.17.1"
opentelemetry-proto = {git = "https://github.com/parseablehq/opentelemetry-rust", branch="fix-metrics-u64-serialization"}
prometheus = { version = "0.13", features = ["process"] }
rand = "0.8.5"
rdkafka = {version = "0.36.2", default-features = false, features = ["tokio"]}
rdkafka = { version = "0.36.2", default-features = false, features = ["tokio"] }
regex = "1.7.3"
relative-path = { version = "1.7", features = ["serde"] }
reqwest = { version = "0.11.27", default-features = false, features = [
Expand All @@ -80,7 +81,7 @@ serde = { version = "1.0", features = ["rc", "derive"] }
serde_json = "1.0"
static-files = "0.2"
sysinfo = "0.31.4"
thiserror = "1.0.64"
thiserror = "2.0.0"
thread-priority = "1.0.0"
tokio = { version = "1.28", default-features = false, features = [
"sync",
Expand Down
13 changes: 11 additions & 2 deletions src/event/format/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use serde_json::Value;
use std::{collections::HashMap, sync::Arc};
use tracing::error;

use super::{EventFormat, Metadata, Tags};
use super::{EventFormat, LogSource, Metadata, Tags};
use crate::{
metadata::SchemaVersion,
utils::{arrow::get_field, json::flatten_json_body},
Expand All @@ -52,8 +52,17 @@ impl EventFormat for Event {
static_schema_flag: Option<&String>,
time_partition: Option<&String>,
schema_version: SchemaVersion,
log_source: &LogSource,
) -> Result<(Self::Data, Vec<Arc<Field>>, bool, Tags, Metadata), anyhow::Error> {
let data = flatten_json_body(self.data, None, None, None, schema_version, false)?;
let data = flatten_json_body(
self.data,
None,
None,
None,
schema_version,
false,
log_source,
)?;
let stream_schema = schema;

// incoming event may be a single json or a json array
Expand Down
35 changes: 35 additions & 0 deletions src/event/format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,38 @@ type Tags = String;
type Metadata = String;
type EventSchema = Vec<Arc<Field>>;

/// Source of the logs, used to perform special processing for certain sources
#[derive(Default, Debug, Clone, PartialEq, Eq)]
pub enum LogSource {
// AWS Kinesis sends logs in the format of a json array
Kinesis,
// OpenTelemetry sends logs according to the specification as explained here
// https://github.com/open-telemetry/opentelemetry-proto/tree/v1.0.0/opentelemetry/proto/logs/v1
OtelLogs,
// OpenTelemetry sends traces according to the specification as explained here
// https://github.com/open-telemetry/opentelemetry-proto/blob/v1.0.0/opentelemetry/proto/trace/v1/trace.proto
OtelMetrics,
// OpenTelemetry sends traces according to the specification as explained here
// https://github.com/open-telemetry/opentelemetry-proto/tree/v1.0.0/opentelemetry/proto/metrics/v1
OtelTraces,
#[default]
// Json object or array
Json,
Custom(String),
}

impl From<&str> for LogSource {
fn from(s: &str) -> Self {
match s {
"kinesis" => LogSource::Kinesis,
"otel-logs" => LogSource::OtelLogs,
"otel-metrics" => LogSource::OtelMetrics,
"otel-traces" => LogSource::OtelTraces,
custom => LogSource::Custom(custom.to_owned()),
}
}
}

// Global Trait for event format
// This trait is implemented by all the event formats
pub trait EventFormat: Sized {
Expand All @@ -54,6 +86,7 @@ pub trait EventFormat: Sized {
static_schema_flag: Option<&String>,
time_partition: Option<&String>,
schema_version: SchemaVersion,
log_source: &LogSource,
) -> Result<(Self::Data, EventSchema, bool, Tags, Metadata), AnyError>;

fn decode(data: Self::Data, schema: Arc<Schema>) -> Result<RecordBatch, AnyError>;
Expand All @@ -64,12 +97,14 @@ pub trait EventFormat: Sized {
static_schema_flag: Option<&String>,
time_partition: Option<&String>,
schema_version: SchemaVersion,
log_source: &LogSource,
) -> Result<(RecordBatch, bool), AnyError> {
let (data, mut schema, is_first, tags, metadata) = self.to_data(
storage_schema,
static_schema_flag,
time_partition,
schema_version,
log_source,
)?;

// DEFAULT_TAGS_KEY, DEFAULT_METADATA_KEY and DEFAULT_TIMESTAMP_KEY are reserved field names
Expand Down
Loading
Loading