diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..40a0861 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,45 @@ +name: CI Diff-quality + +on: + push: + branches: + - diff-quality +env: + DBT_DEFAULT_PROFILE_TARGET: stellar_dbt_public + DBT_PROFILES_DIR: ${{ github.workspace }} + DBT_TARGET: prod + DBT_DATASET: crypto_stellar + DBT_MAX_BYTES_BILLED: 1000000000000 + DBT_JOB_TIMEOUT: 300 + DBT_THREADS: 1 + DBT_JOB_RETRIES: 1 + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Fetch all history + + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: 3.8 + + - name: Install dependencies + run: | + pip install -r requirements.txt + dbt deps + + - name: Authenticate to GCP + uses: 'google-github-actions/auth@v2' + with: + credentials_json: "${{ secrets.CREDS_TEST_HUBBLE }}" + + - name: Checkout source branch and diff quality + run: | + git checkout ${{ github.head_ref }} + diff-quality --violations=sqlfluff --fail-under=80 \ No newline at end of file diff --git a/.sqlfluff b/.sqlfluff index cde1416..c02e8f9 100644 --- a/.sqlfluff +++ b/.sqlfluff @@ -80,7 +80,7 @@ project_dir = ./ # If your project works only with .env files you need to: ### Change ~/.dbt/ for ./ on profiles_dir variable here ### Remove steps on the pipeline that create and copy the profiles.yml to ~/.dbt/ folder. -profiles_dir = ~/.dbt/ +profiles_dir = ./ # Name of the profiles. # CHANGE THIS TO THE PROFILE NAME ON YOUR PROFILES.YML FILE profile = stellar_dbt diff --git a/models/docs/universal.md b/models/docs/universal.md index e72ab18..e3a7262 100644 --- a/models/docs/universal.md +++ b/models/docs/universal.md @@ -14,6 +14,14 @@ The Farm Hash encoding of Asset Code + Asset Issuer + Asset Type. This field is {% enddocs %} +{% docs unique_id %} +Current snapshot tables (tables that end in `*_current`) require a singular, unique identifier so that only records that change are updated. This column is a concatenation of the natural keys to create a unique key. +{% enddocs %} + +{% docs upstream_insert_ts %} +The timestamp in UTC when a batch of records was inserted into an upstream table. This field can help identify gaps in data as well as improve rerun capabilities. The timestamp should not be used during ad hoc analysis and is useful for data engineering purposes. +{% enddocs %} + {% docs asset_code %} The 4 or 12 character code representation of the asset on the network.