cloudquery · jsonpr · Dec 5, 2023 · Dec 1, 2023 · Dec 5, 2023 · Dec 5, 2023
@@ -0,0 +1 @@
+# CloudQuery &times; dbt: AWS Asset Inventory Package
@@ -0,0 +1,39 @@
+
+# Name your project! Project names should contain only lowercase characters
+# and underscores. A good package name should reflect your organization's
+# name or the intended use of these models
+name: 'aws_asset_inventory'
+version: '1.0.0'
+config-version: 2
+
+# This setting configures which "profile" dbt uses for this project.
+profile: 'aws_asset_inventory'
+
+# These configurations specify where dbt should look for different types of files.
+# The `model-paths` config, for example, states that models in this project can be
+# found in the "models/" directory. You probably won't need to change these!
+model-paths: ["models", "../models"]
+analysis-paths: ["analyses"]
+test-paths: ["tests"]
+seed-paths: ["seeds"]
+macro-paths: ["../macros", "../../macros"]
+snapshot-paths: ["snapshots"]
+
+clean-targets:         # directories to be removed by `dbt clean`
+  - "target"
+  - "dbt_packages"
+
+# Configuring models
+# Full documentation: https://docs.getdbt.com/docs/configuring-models
+
+# In this example config, we tell dbt to build all models in the example/
+# directory as views. These settings can be overridden in the individual model
+# files using the `{{ config(...) }}` macro.
+models:
+    # Config indicated by + and applies to all files under models/example/
+    # example:
+    #   +materialized: view
+
+
+
+
@@ -0,0 +1,13 @@
+{
+  "schema_version": 1,
+  "type": "addon",
+  "team_name": "cloudquery",
+  "addon_name": "aws-asset-inventory",
+  "addon_type": "transformation",
+  "addon_format": "zip",
+  "message": "@./changelog.md",
+  "doc": "./README.md",
+  "path": "./build/aws_asset-inventory.zip",
+  "plugin_deps": ["cloudquery/source/[email protected]"],
+  "addon_deps": []
+}
@@ -0,0 +1,23 @@
+--Add Intersect pg_tables to ignore views.
+{% set aws_tables %}
+    SELECT tablename as table_name
+    FROM pg_tables
+    INTERSECT
+    SELECT DISTINCT table_name
+    FROM information_schema.columns
+    WHERE table_name LIKE 'aws_%s' and COLUMN_NAME IN ('account_id', 'request_account_id')
+    INTERSECT
+    SELECT table_name
+    FROM information_schema.columns
+    WHERE table_name LIKE 'aws_%s' and COLUMN_NAME = 'arn';
+{% endset %}
+
+
+
+-- Generate dynamic SQL statements
+{% for row in run_query(aws_tables) %}
+    {% if row.table_name is not none and row.table_name != '' %}
+        {{ aws_asset_resources(row.table_name) }}
+        {% if not loop.last %} UNION ALL {% endif %}
+    {% endif %}
+{% endfor %}
@@ -0,0 +1 @@
+dbt-postgres==1.7.3
@@ -0,0 +1,17 @@
+kind: source
+spec:
+  name: aws
+  path: cloudquery/aws
+  version: "v22.19.0" # latest version of source aws plugin
+  destinations: ["postgresql"]
+  tables: ["*"]
+---
+kind: destination
+spec:
+  name: "postgresql"
+  path: "cloudquery/postgresql"
+  registry: cloudquery
+  version: "v7.1.0" # latest version of postgresql plugin
+  spec:
+    batch_size: 10000
+    connection_string: ${CQ_DSN}
@@ -0,0 +1,12 @@
+aws_asset_inventory: # This should match the name in your dbt_project.yml
+  target: dev
+  outputs:
+    dev-pg:
+      type: postgres
+      host: 127.0.0.1
+      user: postgres
+      pass: pass
+      port: 5432
+      dbname: postgres
+      schema: public # default schema where dbt will build the models
+      threads: 1 # number of threads to use when running in parallel
@@ -0,0 +1,125 @@
+{% macro aws_asset_resources(table_name) %}
+
+    --Determine if Columns Exist for Table
+    --`account_id`
+    {% set account_id_exists_query %}
+        SELECT column_name
+        FROM information_schema.columns
+        WHERE table_name = '{{ table_name }}'
+            AND column_name = 'account_id'
+    {% endset %}
+
+
+    --`request_account_id`
+    {% set request_account_id_exists_query %}
+        SELECT column_name
+        FROM information_schema.columns
+        WHERE table_name = '{{ table_name }}'
+            AND column_name = 'request_account_id'
+    {% endset %}
+
+    --region
+    {% set region_exists_query %}
+        SELECT column_name
+        FROM information_schema.columns
+        WHERE table_name = '{{ table_name }}'
+            AND column_name = 'region'
+    {% endset %}
+
+    --tags
+    {% set tags_exists_query %}
+        SELECT column_name
+        FROM information_schema.columns
+        WHERE table_name = '{{ table_name }}'
+            AND column_name = 'tags'
+    {% endset %}
+
+    /* This block was used when other views were evaluated.
+    {% set cq_id_exists_query %}
+        SELECT column_name
+        FROM information_schema.columns
+        WHERE table_name = '{{ table_name }}'
+            AND column_name = '_cq_id'
+    {% endset %}
+
+    {% set cq_source_name_exists_query %}
+        SELECT column_name
+        FROM information_schema.columns
+        WHERE table_name = '{{ table_name }}'
+            AND column_name = '_cq_source_name'
+    {% endset %}
+
+    {% set cq_sync_time_exists_query %}
+        SELECT column_name
+        FROM information_schema.columns
+        WHERE table_name = '{{ table_name }}'
+            AND column_name = '_cq_sync_time'
+    {% endset %}
+    */ 
+
+    SELECT
+        /* TODO: Not sure why cq_id, cq_source_name, cq_sync_time aren't found in tables. 
+        For now, putting in placeholders when those columns aren't found.  The previous implementation had SELECT _cq_id, _cq_source_name, _cq_sync_time without issues.
+
+         {% if run_query(cq_id_exists_query).rows %}
+            _cq_id
+            {% else %}
+            '11111111-1111-1111-1111-111111111111'
+        {% endif %} AS _cq_id,
+
+        {% if run_query(cq_source_name_exists_query).rows %}
+           _cq_source_name
+           {% else %}
+           'Unknown'
+        {% endif %} AS _cq_source_name,
+
+        {% if run_query(cq_sync_time_exists_query).rows %}
+            _cq_sync_time
+            {% else %}
+            '2000-01-01 00:00:00.000000'
+        {% endif %} AS _cq_sync_time, 
+        */
+        _cq_id, _cq_source_name, _cq_sync_time,
+
+         COALESCE(
+           {% if run_query(account_id_exists_query).rows %}
+                account_id
+            {% else %}
+                 SPLIT_PART(arn, ':', 5)
+            {% endif %}
+        ) AS account_id,
+
+        COALESCE(
+            {% if run_query(request_account_id_exists_query).rows %}
+                request_account_id
+            {% else %}
+                 SPLIT_PART(arn, ':', 5)
+            {% endif %}
+        ) AS request_account_id, 
+
+        CASE
+            WHEN SPLIT_PART(SPLIT_PART(ARN, ':', 6), '/', 2) = '' AND SPLIT_PART(arn, ':', 7) = '' THEN NULL
+            ELSE SPLIT_PART(SPLIT_PART(arn, ':', 6), '/', 1)
+        END AS TYPE,
+        arn,
+
+        --TODO: Fix for some resources that may have regions (WAF Rule Group, aws_ec2_managed_prefix_lists)
+        {% if run_query(region_exists_query).rows %}
+            region
+            {% else %}
+            'unavailable'
+        {% endif %} AS region,
+
+        {% if run_query(tags_exists_query).rows %}
+            tags
+        {% else %}
+            '{}'::jsonb
+        {% endif %} AS tags,
+
+        SPLIT_PART(arn, ':', 2) AS PARTITION,
+        SPLIT_PART(arn, ':', 3) AS service,
+
+        '{{ table_name | string }}' AS _cq_table
+    FROM {{ table_name | string }}
+
+{% endmacro %}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		# CloudQuery × dbt: AWS Asset Inventory Package