Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(external docs): generate global option configuration automatically from Rust code #22345

Merged
merged 18 commits into from
Feb 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions lib/vector-config-common/src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ pub const COMPONENT_TYPE_SECRETS: &str = "secrets";
pub const COMPONENT_TYPE_SINK: &str = "sink";
pub const COMPONENT_TYPE_SOURCE: &str = "source";
pub const COMPONENT_TYPE_TRANSFORM: &str = "transform";
pub const COMPONENT_TYPE_GLOBAL_OPTION: &str = "global_option";
pub const DOCS_META_ADDITIONAL_PROPS_DESC: &str = "docs::additional_props_description";
pub const DOCS_META_ADVANCED: &str = "docs::advanced";
pub const DOCS_META_COMPONENT_BASE_TYPE: &str = "docs::component_base_type";
Expand All @@ -32,6 +33,7 @@ pub const METADATA: &str = "_metadata";
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum ComponentType {
EnrichmentTable,
GlobalOption,
Provider,
Secrets,
Sink,
Expand All @@ -44,6 +46,7 @@ impl ComponentType {
pub const fn as_str(&self) -> &'static str {
match self {
ComponentType::EnrichmentTable => COMPONENT_TYPE_ENRICHMENT_TABLE,
ComponentType::GlobalOption => COMPONENT_TYPE_GLOBAL_OPTION,
ComponentType::Provider => COMPONENT_TYPE_PROVIDER,
ComponentType::Secrets => COMPONENT_TYPE_SECRETS,
ComponentType::Sink => COMPONENT_TYPE_SINK,
Expand All @@ -63,6 +66,7 @@ impl<'a> TryFrom<&'a str> for ComponentType {
fn try_from(value: &'a str) -> Result<Self, Self::Error> {
match value {
COMPONENT_TYPE_ENRICHMENT_TABLE => Ok(ComponentType::EnrichmentTable),
COMPONENT_TYPE_GLOBAL_OPTION => Ok(ComponentType::GlobalOption),
COMPONENT_TYPE_PROVIDER => Ok(ComponentType::Provider),
COMPONENT_TYPE_SECRETS => Ok(ComponentType::Secrets),
COMPONENT_TYPE_SINK => Ok(ComponentType::Sink),
Expand Down
1 change: 1 addition & 0 deletions lib/vector-config-macros/src/attrs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ impl AttributeIdent {

pub const NO_SER: AttributeIdent = AttributeIdent("no_ser");
pub const NO_DESER: AttributeIdent = AttributeIdent("no_deser");
pub const GLOBAL_OPTION_COMPONENT: AttributeIdent = AttributeIdent("global_option_component");
pub const ENRICHMENT_TABLE_COMPONENT: AttributeIdent = AttributeIdent("enrichment_table_component");
pub const PROVIDER_COMPONENT: AttributeIdent = AttributeIdent("provider_component");
pub const SECRETS_COMPONENT: AttributeIdent = AttributeIdent("secrets_component");
Expand Down
1 change: 1 addition & 0 deletions lib/vector-config-macros/src/component_name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ fn attr_to_component_name(attr: &Attribute) -> Result<Option<String>, Error> {
attr.path(),
&[
attrs::ENRICHMENT_TABLE_COMPONENT,
attrs::GLOBAL_OPTION_COMPONENT,
attrs::PROVIDER_COMPONENT,
attrs::SINK_COMPONENT,
attrs::SOURCE_COMPONENT,
Expand Down
4 changes: 4 additions & 0 deletions lib/vector-config-macros/src/configurable_component.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ impl TypedComponent {
ComponentType::EnrichmentTable => {
parse_quote! { ::vector_config::component::EnrichmentTableDescription }
}
ComponentType::GlobalOption => {
parse_quote! { ::vector_config::component::GlobalOptionDescription }
}
ComponentType::Provider => {
parse_quote! { ::vector_config::component::ProviderDescription }
}
Expand Down Expand Up @@ -348,6 +351,7 @@ pub fn configurable_component_impl(args: TokenStream, item: TokenStream) -> Toke
fn get_named_component_helper_ident(component_type: ComponentType) -> Ident {
let attr = match component_type {
ComponentType::EnrichmentTable => attrs::ENRICHMENT_TABLE_COMPONENT,
ComponentType::GlobalOption => attrs::GLOBAL_OPTION_COMPONENT,
ComponentType::Provider => attrs::PROVIDER_COMPONENT,
ComponentType::Secrets => attrs::SECRETS_COMPONENT,
ComponentType::Sink => attrs::SINK_COMPONENT,
Expand Down
1 change: 1 addition & 0 deletions lib/vector-config-macros/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ pub fn derive_configurable(input: TokenStream) -> TokenStream {
NamedComponent,
attributes(
enrichment_table_component,
global_option_component,
provider_component,
secrets_component,
sink_component,
Expand Down
5 changes: 5 additions & 0 deletions lib/vector-config/src/component/marker.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
/// An enrichment table component.
pub struct EnrichmentTableComponent;

// A global option component.
pub struct GlobalOptionComponent;

/// A provider component.
pub struct ProviderComponent;

Expand All @@ -20,6 +23,7 @@ pub struct TransformComponent;
pub trait ComponentMarker: sealed::Sealed {}

impl ComponentMarker for EnrichmentTableComponent {}
impl ComponentMarker for GlobalOptionComponent {}
impl ComponentMarker for ProviderComponent {}
impl ComponentMarker for SecretsComponent {}
impl ComponentMarker for SinkComponent {}
Expand All @@ -30,6 +34,7 @@ mod sealed {
pub trait Sealed {}

impl Sealed for super::EnrichmentTableComponent {}
impl Sealed for super::GlobalOptionComponent {}
impl Sealed for super::ProviderComponent {}
impl Sealed for super::SecretsComponent {}
impl Sealed for super::SinkComponent {}
Expand Down
6 changes: 4 additions & 2 deletions lib/vector-config/src/component/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ mod marker;
pub use self::description::{ComponentDescription, ExampleError};
pub use self::generate::GenerateConfig;
pub use self::marker::{
ComponentMarker, EnrichmentTableComponent, ProviderComponent, SecretsComponent, SinkComponent,
SourceComponent, TransformComponent,
ComponentMarker, EnrichmentTableComponent, GlobalOptionComponent, ProviderComponent,
SecretsComponent, SinkComponent, SourceComponent, TransformComponent,
};

// Create some type aliases for the component marker/description types, and collect (register,
Expand All @@ -17,10 +17,12 @@ pub type SecretsDescription = ComponentDescription<SecretsComponent>;
pub type SinkDescription = ComponentDescription<SinkComponent>;
pub type EnrichmentTableDescription = ComponentDescription<EnrichmentTableComponent>;
pub type ProviderDescription = ComponentDescription<ProviderComponent>;
pub type GlobalOptionDescription = ComponentDescription<GlobalOptionComponent>;

inventory::collect!(SourceDescription);
inventory::collect!(TransformDescription);
inventory::collect!(SecretsDescription);
inventory::collect!(SinkDescription);
inventory::collect!(EnrichmentTableDescription);
inventory::collect!(ProviderDescription);
inventory::collect!(GlobalOptionDescription);
60 changes: 45 additions & 15 deletions scripts/generate-component-docs.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1659,7 +1659,7 @@ def get_rendered_description_from_schema(schema)
description.strip
end

def render_and_import_schema(root_schema, schema_name, friendly_name, config_map_path, cue_relative_path)
def unwrap_resolved_schema(root_schema, schema_name, friendly_name)
@logger.info "[*] Resolving schema definition for #{friendly_name}..."

# Try and resolve the schema, unwrapping it as an object schema which is a requirement/expectation
Expand All @@ -1673,7 +1673,10 @@ def render_and_import_schema(root_schema, schema_name, friendly_name, config_map
exit 1
end

unwrapped_resolved_schema = sort_hash_nested(unwrapped_resolved_schema)
return sort_hash_nested(unwrapped_resolved_schema)
end

def render_and_import_schema(unwrapped_resolved_schema, friendly_name, config_map_path, cue_relative_path)

# Set up the appropriate structure for the value based on the configuration map path. It defines
# the nested levels of the map where our resolved schema should go, as well as a means to generate
Expand All @@ -1691,16 +1694,15 @@ def render_and_import_schema(root_schema, schema_name, friendly_name, config_map
config_map_path.prepend('config-schema-base')
tmp_file_prefix = config_map_path.join('-')

final = { 'base' => { 'components' => data } }
final_json = to_pretty_json(final)
final_json = to_pretty_json(data)

# Write the resolved schema as JSON, which we'll then use to import into a Cue file.
json_output_file = write_to_temp_file(["config-schema-#{tmp_file_prefix}-", '.json'], final_json)
@logger.info "[✓] Wrote #{friendly_name} schema to '#{json_output_file}'. (#{final_json.length} bytes)"

# Try importing it as Cue.
@logger.info "[*] Importing #{friendly_name} schema as Cue file..."
cue_output_file = "website/cue/reference/components/#{cue_relative_path}"
cue_output_file = "website/cue/reference/#{cue_relative_path}"
unless system(@cue_binary_path, 'import', '-f', '-o', cue_output_file, '-p', 'metadata', json_output_file)
@logger.error "[!] Failed to import #{friendly_name} schema as valid Cue."
exit 1
Expand All @@ -1709,22 +1711,40 @@ def render_and_import_schema(root_schema, schema_name, friendly_name, config_map
end

def render_and_import_base_component_schema(root_schema, schema_name, component_type)
friendly_name = "base #{component_type} configuration"
unwrapped_resolved_schema = unwrap_resolved_schema(root_schema, schema_name, friendly_name)
render_and_import_schema(
root_schema,
schema_name,
"base #{component_type} configuration",
["#{component_type}s"],
"base/#{component_type}s.cue"
unwrapped_resolved_schema,
friendly_name,
["base", "components", "#{component_type}s"],
"components/base/#{component_type}s.cue"
)
end

def render_and_import_component_schema(root_schema, schema_name, component_type, component_name)
friendly_name = "'#{component_name}' #{component_type} configuration"
unwrapped_resolved_schema = unwrap_resolved_schema(root_schema, schema_name, friendly_name)
render_and_import_schema(
unwrapped_resolved_schema,
friendly_name,
["base", "components", "#{component_type}s", component_name],
"components/#{component_type}s/base/#{component_name}.cue"
)
end

def render_and_import_base_global_option_schema(root_schema, global_options)
global_option_schema = {}
global_options.each do |component_name, schema_name|
# global schema does not need unwrapped schema, we call resolve_schema_by_name directly
resolved_schema = resolve_schema_by_name(root_schema, schema_name)
global_option_schema[component_name] = resolved_schema
end

render_and_import_schema(
root_schema,
schema_name,
"'#{component_name}' #{component_type} configuration",
["#{component_type}s", component_name],
"#{component_type}s/base/#{component_name}.cue"
global_option_schema,
"configuration",
["base", "configuration"],
"base/configuration.cue"
)
end

Expand Down Expand Up @@ -1773,3 +1793,13 @@ def render_and_import_component_schema(root_schema, schema_name, component_type,
render_and_import_component_schema(root_schema, schema_name, component_type, component_name)
end
end

# At last, we generate the global options configuration.
global_options = root_schema['definitions'].filter_map do |key, definition|
component_type = get_schema_metadata(definition, 'docs::component_type')
component_name = get_schema_metadata(definition, 'docs::component_name')
{ component_name => key } if component_type == "global_option"
end
.reduce { |acc, item| nested_merge(acc, item) }

render_and_import_base_global_option_schema(root_schema, global_options)
8 changes: 6 additions & 2 deletions src/enrichment_tables/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::config::EnrichmentTableConfig;
#[configurable_component]
#[derive(Clone, Debug, Eq, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
#[configurable(metadata(docs::enum_tag_description = "File encoding type."))]
pub enum Encoding {
/// Decodes the file as a [CSV][csv] (comma-separated values) file.
///
Expand Down Expand Up @@ -76,7 +77,7 @@ pub struct FileConfig {
/// 1. One of the built-in-formats listed in the `Timestamp Formats` table below.
/// 2. The [time format specifiers][chrono_fmt] from Rust’s `chrono` library.
///
/// ### Types
/// Types
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed the header here since it would be displayed on the right side

image

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting, yeah it might better without that header.

///
/// - **`bool`**
/// - **`string`**
Expand All @@ -85,7 +86,7 @@ pub struct FileConfig {
/// - **`date`**
/// - **`timestamp`** (see the table below for formats)
///
/// ### Timestamp Formats
/// Timestamp Formats
///
/// | Format | Description | Example |
/// |----------------------|----------------------------------------------------------------------------------|----------------------------------|
Expand All @@ -111,6 +112,9 @@ pub struct FileConfig {
/// [rfc3339]: https://tools.ietf.org/html/rfc3339
/// [chrono_fmt]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html#specifiers
#[serde(default)]
#[configurable(metadata(
docs::additional_props_description = "Represents mapped log field names and types."
))]
pub schema: HashMap<String, String>,
}

Expand Down
47 changes: 29 additions & 18 deletions src/enrichment_tables/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
//! Functionality to handle enrichment tables.
use crate::sinks::prelude::SinkConfig;
use enum_dispatch::enum_dispatch;
use vector_lib::configurable::{configurable_component, NamedComponent};
use vector_lib::configurable::configurable_component;
pub use vector_lib::enrichment::{Condition, IndexHandle, Table};

use crate::config::{EnrichmentTableConfig, GlobalOptions};
use crate::config::{EnrichmentTableConfig, GenerateConfig, GlobalOptions};

pub mod file;

Expand All @@ -17,11 +17,26 @@ pub mod geoip;
#[cfg(feature = "enrichment-tables-mmdb")]
pub mod mmdb;

/// Configurable enrichment tables.
#[configurable_component]
/// Configuration options for an [enrichment table](https://vector.dev/docs/reference/glossary/#enrichment-tables) to be used in a
/// [`remap`](https://vector.dev/docs/reference/configuration/transforms/remap/) transform. Currently supported are:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I notice that there are some links error in the generated CUE when we want to reference pre-defined URL such as urls.maxmind. I decide to put the original link in Rust side since it would be renderend on both Vector docs and Rust docs.

///
/// * [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) files
/// * [MaxMind](https://www.maxmind.com/en/home) databases
/// * In-memory storage
///
/// For the lookup in the enrichment tables to be as performant as possible, the data is indexed according
/// to the fields that are used in the search. Note that indices can only be created for fields for which an
/// exact match is used in the condition. For range searches, an index isn't used and the enrichment table
/// drops back to a sequential scan of the data. A sequential scan shouldn't impact performance
/// significantly provided that there are only a few possible rows returned by the exact matches in the
/// condition. We don't recommend using a condition that uses only date range searches.
///
///
#[configurable_component(global_option("enrichment_tables"))]
#[derive(Clone, Debug)]
#[serde(tag = "type", rename_all = "snake_case")]
#[enum_dispatch(EnrichmentTableConfig)]
#[configurable(metadata(docs::enum_tag_description = "enrichment table type"))]
pub enum EnrichmentTables {
/// Exposes data from a static file as an enrichment table.
File(file::FileConfig),
Expand All @@ -45,19 +60,15 @@ pub enum EnrichmentTables {
Mmdb(mmdb::MmdbConfig),
}

// TODO: Use `enum_dispatch` here.
impl NamedComponent for EnrichmentTables {
fn get_component_name(&self) -> &'static str {
match self {
Self::File(config) => config.get_component_name(),
#[cfg(feature = "enrichment-tables-memory")]
Self::Memory(config) => config.get_component_name(),
#[cfg(feature = "enrichment-tables-geoip")]
Self::Geoip(config) => config.get_component_name(),
#[cfg(feature = "enrichment-tables-mmdb")]
Self::Mmdb(config) => config.get_component_name(),
#[allow(unreachable_patterns)]
_ => unimplemented!(),
}
impl GenerateConfig for EnrichmentTables {
fn generate_config() -> toml::Value {
toml::Value::try_from(Self::File(file::FileConfig {
file: file::FileSettings {
path: "path/to/file".into(),
encoding: file::Encoding::default(),
},
schema: Default::default(),
}))
.unwrap()
}
}
Loading
Loading