diff --git a/docs/flow_configuration_reference.md b/docs/flow_configuration_reference.md new file mode 100644 index 0000000..d1abb2a --- /dev/null +++ b/docs/flow_configuration_reference.md @@ -0,0 +1,48 @@ +When calling the module, you will need to specify a `flow_configuration`. This page documents this structure. + +```hcl +module "airbyte_flows" { + source = "artefactory/airbyte-flows/google" + version = "~> 0" + + project_id = local.project_id + airbyte_service_account_email = local.airbyte_service_account + + flows_configuration = {} # <-- This right here +} +``` + +## Full specification + +```hcl +map(object({ + flow_name = string # Display name for your data flow + source_name = string # Name of the source. Either one from https://docs.airbyte.com/category/sources or a custom one. + + custom_source = optional(object({ # Default: null. If source_name is not in the Airbyte sources catalog, you need to specify where to find it + docker_repository = string # Docker Repository URL (e.g. 112233445566.dkr.ecr.us-east-1.amazonaws.com/source-custom) or DockerHub identifier (e.g. airbyte/source-postgres) + docker_image_tag = string # Docker image tag + documentation_url = string # Custom source documentation URL + })) + + cron_schedule = optional(string, "manual") # Default: manual. Cron expression for when syncs should run (ex. "0 0 12 * * ?" =\> Will sync at 12:00 PM every day) + cron_timezone = optional(string, "UTC") # Default: UTC. One of the TZ identifiers at https://en.wikipedia.org/wiki/List_of_tz_database_time_zones + + normalize = optional(bool, true) # Default: true. Whether Airbyte should normalize the data after ingestion. https://docs.airbyte.com/understanding-airbyte/basic-normalization/ + + tables_to_sync = map(object({ # All streams to extract from the source and load to BigQuery + sync_mode = optional(string, "full_refresh") # Allowed: full_refresh | incremental. Default: full_refresh + destination_sync_mode = optional(string, "append") # Allowed: append | overwrite | append_dedup. Default: append + cursor_field = optional(string) # Path to the field that will be used to determine if a record is new or modified since the last sync. This field is REQUIRED if sync_mode is incremental. Otherwise it is ignored. + primary_key = optional(string) # List of the fields that will be used as primary key (multiple fields can be listed for a composite PK). This field is REQUIRED if destination_sync_mode is *_dedup. Otherwise it is ignored. + })) + + source_specification = map(string) # Source-specific configurations. Refer to the connectors catalog for more info. For any string like "secret:\", the module will fetch the value of `secret_name` in the Secret Manager. + + destination_specification = object({ + dataset_name = string # Existing dataset to which your data will be written + dataset_location = string # Allowed: EU | US | Any valid BQ region as specified here https://cloud.google.com/bigquery/docs/locations + staging_bucket_name = string # Existing bucket in which your data will be written as avro files at each connection run. + }) +})) +``` diff --git a/docs/index.md b/docs/index.md index 9d27645..1b0ce20 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,9 +1,3 @@ ---- -hide: - - navigation - - toc ---- - This module will help you programmatically deploy end-to-end ELT flows to BigQuery on Airbyte. It supports custom sources and integrates with the secret manager to securely store sensitive configurations. Also allows you to specify flows as YAML. diff --git a/examples/yaml/README.md b/examples/yaml/README.md index fca45db..c2c28dd 100644 --- a/examples/yaml/README.md +++ b/examples/yaml/README.md @@ -1,44 +1,3 @@ -### YAML Configuration reference: -```yaml -\: - flow_name: string # Display name for your data flow - source_name: string # Name of the source. Either one from https://docs.airbyte.com/category/sources or a custom one. - - custom_source: # Default: null. If source_name is not in the Airbyte sources catalog, you need to specify where to find it - docker_repository: string # Docker Repository URL (e.g. 112233445566.dkr.ecr.us-east-1.amazonaws.com/source-custom) or DockerHub identifier (e.g. airbyte/source-postgres) - docker_image_tag: string # Docker image tag - documentation_url: string # Custom source documentation URL - - cron_schedule: string # Default: manual. Cron expression for when syncs should run (ex. "0 0 12 * * ?" =\> Will sync at 12:00 PM every day) - cron_timezone: string # Default: UTC. One of the TZ identifiers at https://en.wikipedia.org/wiki/List_of_tz_database_time_zones - - normalize: bool # Default: true. Whether Airbyte should normalize the data after ingestion. https://docs.airbyte.com/understanding-airbyte/basic-normalization/ - - tables_to_sync: # All streams to extract from the source and load to BigQuery - \: - sync_mode: string # Allowed: full_refresh | incremental. Default: full_refresh - destination_sync_mode: string # Allowed: append | overwrite | append_dedup. Default: append - cursor_field: string # Path to the field that will be used to determine if a record is new or modified since the last sync. This field is REQUIRED if sync_mode is incremental. Otherwise it is ignored. - primary_key: # List of the fields that will be used as primary key (multiple fields can be listed for a composite PK). This field is REQUIRED if destination_sync_mode is *_dedup. Otherwise it is ignored. - - string - - string - - \: - ... - - source_specification: # Source-specific configurations - \: any # The types are defined by the source connector - \: any # For any string like "secret:\", the module will fetch the value of `secret_name` in the Secret Manager. - - destination_specification: - dataset_name: string # Existing dataset to which your data will be written - dataset_location: string # Allowed: EU | US | Any valid BQ region as specified here https://cloud.google.com/bigquery/docs/locations - staging_bucket_name: string # Existing bucket in which your data will be written as avro files at each connection run. - -\: - ... -``` - ## Requirements | Name | Version | diff --git a/examples/yaml/main.tf b/examples/yaml/main.tf deleted file mode 100644 index 774ca50..0000000 --- a/examples/yaml/main.tf +++ /dev/null @@ -1,42 +0,0 @@ -/* -* ### YAML Configuration reference: -* ```yaml -* \: -* flow_name: string # Display name for your data flow -* source_name: string # Name of the source. Either one from https://docs.airbyte.com/category/sources or a custom one. -* -* custom_source: # Default: null. If source_name is not in the Airbyte sources catalog, you need to specify where to find it -* docker_repository: string # Docker Repository URL (e.g. 112233445566.dkr.ecr.us-east-1.amazonaws.com/source-custom) or DockerHub identifier (e.g. airbyte/source-postgres) -* docker_image_tag: string # Docker image tag -* documentation_url: string # Custom source documentation URL -* -* cron_schedule: string # Default: manual. Cron expression for when syncs should run (ex. "0 0 12 * * ?" =\> Will sync at 12:00 PM every day) -* cron_timezone: string # Default: UTC. One of the TZ identifiers at https://en.wikipedia.org/wiki/List_of_tz_database_time_zones -* -* normalize: bool # Default: true. Whether Airbyte should normalize the data after ingestion. https://docs.airbyte.com/understanding-airbyte/basic-normalization/ -* -* tables_to_sync: # All streams to extract from the source and load to BigQuery -* \: -* sync_mode: string # Allowed: full_refresh | incremental. Default: full_refresh -* destination_sync_mode: string # Allowed: append | overwrite | append_dedup. Default: append -* cursor_field: string # Path to the field that will be used to determine if a record is new or modified since the last sync. This field is REQUIRED if sync_mode is incremental. Otherwise it is ignored. -* primary_key: # List of the fields that will be used as primary key (multiple fields can be listed for a composite PK). This field is REQUIRED if destination_sync_mode is *_dedup. Otherwise it is ignored. -* - string -* - string -* -* \: -* ... -* -* source_specification: # Source-specific configurations -* \: any # The types are defined by the source connector -* \: any # For any string like "secret:\", the module will fetch the value of `secret_name` in the Secret Manager. -* -* destination_specification: -* dataset_name: string # Existing dataset to which your data will be written -* dataset_location: string # Allowed: EU | US | Any valid BQ region as specified here https://cloud.google.com/bigquery/docs/locations -* staging_bucket_name: string # Existing bucket in which your data will be written as avro files at each connection run. -* -* \: -* ... -* ``` -*/ diff --git a/mkdocs.yaml b/mkdocs.yaml index 98b1292..acb4af1 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -6,4 +6,5 @@ plugins: - techdocs-core nav: - - Home: index.md \ No newline at end of file + - Home: index.md + - flow_configuration reference: flow_configuration_reference.md \ No newline at end of file