Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: AWS Asset Inventory Transformation #327

Merged
merged 13 commits into from
Dec 5, 2023
1 change: 1 addition & 0 deletions transformations/aws/asset-inventory-free/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# CloudQuery × dbt: AWS Asset Inventory Package
Empty file.
39 changes: 39 additions & 0 deletions transformations/aws/asset-inventory-free/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@

# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'aws_asset_inventory'
version: '1.0.0'
config-version: 2

# This setting configures which "profile" dbt uses for this project.
profile: 'aws_asset_inventory'

# These configurations specify where dbt should look for different types of files.
# The `model-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
model-paths: ["models", "../models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["../macros", "../../macros"]
snapshot-paths: ["snapshots"]

clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"

# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models

# In this example config, we tell dbt to build all models in the example/
# directory as views. These settings can be overridden in the individual model
# files using the `{{ config(...) }}` macro.
models:
# Config indicated by + and applies to all files under models/example/
# example:
# +materialized: view




13 changes: 13 additions & 0 deletions transformations/aws/asset-inventory-free/manifest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"schema_version": 1,
"type": "addon",
"team_name": "cloudquery",
"addon_name": "aws-asset-inventory",
"addon_type": "transformation",
"addon_format": "zip",
"message": "@./changelog.md",
"doc": "./README.md",
"path": "./build/aws_asset-inventory.zip",
"plugin_deps": ["cloudquery/source/[email protected]"],
"addon_deps": []
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
--Add Intersect pg_tables to ignore views.
{% set aws_tables %}
SELECT tablename as table_name
FROM pg_tables
INTERSECT
SELECT DISTINCT table_name
FROM information_schema.columns
WHERE table_name LIKE 'aws_%s' and COLUMN_NAME IN ('account_id', 'request_account_id')
INTERSECT
SELECT table_name
FROM information_schema.columns
WHERE table_name LIKE 'aws_%s' and COLUMN_NAME = 'arn';
{% endset %}



-- Generate dynamic SQL statements
{% for row in run_query(aws_tables) %}
{% if row.table_name is not none and row.table_name != '' %}
{{ aws_asset_resources(row.table_name) }}
{% if not loop.last %} UNION ALL {% endif %}
{% endif %}
{% endfor %}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
dbt-postgres==1.7.3
Empty file.
Empty file.
Empty file.
17 changes: 17 additions & 0 deletions transformations/aws/asset-inventory-free/tests/postgres.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
kind: source
spec:
name: aws
path: cloudquery/aws
version: "v22.19.0" # latest version of source aws plugin
destinations: ["postgresql"]
tables: ["*"]
---
kind: destination
spec:
name: "postgresql"
path: "cloudquery/postgresql"
registry: cloudquery
version: "v7.1.0" # latest version of postgresql plugin
spec:
batch_size: 10000
connection_string: ${CQ_DSN}
12 changes: 12 additions & 0 deletions transformations/aws/asset-inventory-free/tests/profiles.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
aws_asset_inventory: # This should match the name in your dbt_project.yml
target: dev
outputs:
dev-pg:
type: postgres
host: 127.0.0.1
user: postgres
pass: pass
port: 5432
dbname: postgres
schema: public # default schema where dbt will build the models
threads: 1 # number of threads to use when running in parallel
125 changes: 125 additions & 0 deletions transformations/aws/macros/aws_asset_resources.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
{% macro aws_asset_resources(table_name) %}

--Determine if Columns Exist for Table
--`account_id`
{% set account_id_exists_query %}
SELECT column_name
FROM information_schema.columns
WHERE table_name = '{{ table_name }}'
AND column_name = 'account_id'
{% endset %}


--`request_account_id`
{% set request_account_id_exists_query %}
SELECT column_name
FROM information_schema.columns
WHERE table_name = '{{ table_name }}'
AND column_name = 'request_account_id'
{% endset %}

--region
{% set region_exists_query %}
SELECT column_name
FROM information_schema.columns
WHERE table_name = '{{ table_name }}'
AND column_name = 'region'
{% endset %}

--tags
{% set tags_exists_query %}
SELECT column_name
FROM information_schema.columns
WHERE table_name = '{{ table_name }}'
AND column_name = 'tags'
{% endset %}

/* This block was used when other views were evaluated.
{% set cq_id_exists_query %}
SELECT column_name
FROM information_schema.columns
WHERE table_name = '{{ table_name }}'
AND column_name = '_cq_id'
{% endset %}

{% set cq_source_name_exists_query %}
SELECT column_name
FROM information_schema.columns
WHERE table_name = '{{ table_name }}'
AND column_name = '_cq_source_name'
{% endset %}

{% set cq_sync_time_exists_query %}
SELECT column_name
FROM information_schema.columns
WHERE table_name = '{{ table_name }}'
AND column_name = '_cq_sync_time'
{% endset %}
*/

SELECT
/* TODO: Not sure why cq_id, cq_source_name, cq_sync_time aren't found in tables.
For now, putting in placeholders when those columns aren't found. The previous implementation had SELECT _cq_id, _cq_source_name, _cq_sync_time without issues.

{% if run_query(cq_id_exists_query).rows %}
_cq_id
{% else %}
'11111111-1111-1111-1111-111111111111'
{% endif %} AS _cq_id,

{% if run_query(cq_source_name_exists_query).rows %}
_cq_source_name
{% else %}
'Unknown'
{% endif %} AS _cq_source_name,

{% if run_query(cq_sync_time_exists_query).rows %}
_cq_sync_time
{% else %}
'2000-01-01 00:00:00.000000'
{% endif %} AS _cq_sync_time,
*/
_cq_id, _cq_source_name, _cq_sync_time,

COALESCE(
{% if run_query(account_id_exists_query).rows %}
account_id
{% else %}
SPLIT_PART(arn, ':', 5)
{% endif %}
) AS account_id,

COALESCE(
{% if run_query(request_account_id_exists_query).rows %}
request_account_id
{% else %}
SPLIT_PART(arn, ':', 5)
{% endif %}
) AS request_account_id,

CASE
WHEN SPLIT_PART(SPLIT_PART(ARN, ':', 6), '/', 2) = '' AND SPLIT_PART(arn, ':', 7) = '' THEN NULL
ELSE SPLIT_PART(SPLIT_PART(arn, ':', 6), '/', 1)
END AS TYPE,
arn,

--TODO: Fix for some resources that may have regions (WAF Rule Group, aws_ec2_managed_prefix_lists)
{% if run_query(region_exists_query).rows %}
region
{% else %}
'unavailable'
{% endif %} AS region,

{% if run_query(tags_exists_query).rows %}
tags
{% else %}
'{}'::jsonb
{% endif %} AS tags,

SPLIT_PART(arn, ':', 2) AS PARTITION,
SPLIT_PART(arn, ':', 3) AS service,

'{{ table_name | string }}' AS _cq_table
FROM {{ table_name | string }}

{% endmacro %}
Loading