diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..7ef56f4a --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,17 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "pip" + directory: "/" # Location of package manifests + schedule: + interval: "weekly" + + - package-ecosystem: "github-actions" + # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.) + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml new file mode 100644 index 00000000..604e4f8f --- /dev/null +++ b/.github/release-drafter.yml @@ -0,0 +1,49 @@ +name-template: 'v$RESOLVED_VERSION' +tag-template: 'v$RESOLVED_VERSION' +categories: + - title: 'New Features ✨' + labels: + - 'feature' + - 'enhancement' + - title: 'Bug Fixes 🐛' + labels: + - 'fix' + - 'bugfix' + - 'bug' + - title: 'Under the Hood ⚙️' + labels: + - 'chore' + - 'ci' + - 'refactor' + - title: 'Documentation 📖' + label: 'docs' +change-template: '- $SUBJECT (#$NUMBER) - **_Thanks, @$AUTHOR_**!' +change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks. +version-resolver: + major: + labels: + - 'major' + minor: + labels: + - 'minor' + patch: + labels: + - 'patch' + default: patch +template: | + ## Changes + + $CHANGES +autolabeler: + - label: 'chore' + title: + - '/^chore(\(.*\))?\:/i' + - label: 'ci' + title: + - '/^ci(\(.*\))?\:/i' + - label: 'bug' + title: + - '/^fix(\(.*\))?\:/i' + - label: 'enhancement' + title: + - '/^feat(\(.*\))?/i' diff --git a/.github/workflows/connector-tests.yml b/.github/workflows/connector-tests.yml new file mode 100644 index 00000000..23ec0650 --- /dev/null +++ b/.github/workflows/connector-tests.yml @@ -0,0 +1,130 @@ +name: Connectors Tests + +concurrency: + # This is the name of the concurrency group. It is used to prevent concurrent runs of the same workflow. + # + # - github.head_ref is only defined on PR runs, it makes sure that the concurrency group is unique for pull requests + # ensuring that only one run per pull request is active at a time. + # + # - github.run_id is defined on all runs, it makes sure that the concurrency group is unique for workflow dispatches. + # This allows us to run multiple workflow dispatches in parallel. + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +on: + workflow_dispatch: + pull_request: + types: + - opened + - synchronize +jobs: + cdk_changes: + name: Get Changes + runs-on: ubuntu-latest + permissions: + statuses: write + pull-requests: read + steps: + - name: Checkout Airbyte + if: github.event_name != 'pull_request' + uses: actions/checkout@v4 + - id: changes + uses: dorny/paths-filter@v3.0.2 + with: + filters: | + src: + - 'airbyte_cdk/**' + - 'bin/**' + - 'poetry.lock' + - 'pyproject.toml' + file-based: + - 'airbyte_cdk/sources/file_based/**' + vector-db-based: + - 'airbyte_cdk/destinations/vector_db_based/**' + sql: + - 'airbyte_cdk/sql/**' + outputs: + # Source code modified: + src: ${{ steps.changes.outputs.src }} + # Extras modified: + file-based: ${{ steps.changes.outputs.file-based }} + vector-db-based: ${{ steps.changes.outputs.vector-db-based }} + sql: ${{ steps.changes.outputs.sql }} + + + # # The Connector CI Tests is a status check emitted by airbyte-ci + # # We make it pass once we have determined that there are no changes to the connectors + # - name: "Skip Connectors CI tests" + # if: steps.changes.outputs.src != 'true' && github.event_name == 'pull_request' + # run: | + # curl --request POST \ + # --url https://api.github.com/repos/${{ github.repository }}/statuses/${{ github.event.pull_request.head.sha }} \ + # --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \ + # --header 'content-type: application/json' \ + # --data '{ + # "state": "success", + # "context": "CDK Changes - Connectors Tests", + # "target_url": "${{ github.event.workflow_run.html_url }}" + # }' \ + + connectors_ci: + needs: cdk_changes + # We only run the Connectors CI job if there are changes to the connectors on a non-forked PR + # Forked PRs are handled by the community_ci.yml workflow + # If the condition is not met the job will be skipped (it will not fail) + # runs-on: connector-test-large + runs-on: ubuntu-latest + timeout-minutes: 360 # 6 hours + strategy: + fail-fast: true # Save resources by aborting if one connector fails + matrix: + include: + - connector: source-shopify + cdk_extra: n/a + - connector: source-zendesk-support + cdk_extra: n/a + - connector: source-s3 + cdk_extra: file-based + - connector: destination-pinecone + cdk_extra: vector-db-based + - connector: destination-motherduck + cdk_extra: sql + if: > + ( github.event_name == 'pull_request' && needs.cdk_changes.outputs.src == 'true' && github.event.pull_request.head.repo.fork != true + ) || github.event_name == 'workflow_dispatch' + name: "Check: '${{matrix.connector}}' (skip=${{needs.cdk_changes.outputs[matrix.cdk_extra] == 'false'}})" + steps: + - name: Abort if extra not changed (${{matrix.cdk_extra}}) + id: no_changes + if: ${{ matrix.cdk_extra != 'n/a' && needs.cdk_changes.outputs[matrix.cdk_extra] == 'false' }} + run: | + echo "Aborting job as specified extra not changed: ${{matrix.cdk_extra}} = ${{ needs.cdk_changes.outputs[matrix.cdk_extra] }}" + echo "::set-output name=status::cancelled" + exit 1 + continue-on-error: true + # Get the monorepo so we can test the connectors + - name: Checkout Airbyte Monorepo + uses: actions/checkout@v4 + if: steps.no_changes.outcome != 'failure' + with: + repository: airbytehq/airbyte + ref: master + - name: Fetch last commit id from remote branch [PULL REQUESTS] + if: github.event_name == 'pull_request' && steps.no_changes.outcome != 'failure' + id: fetch_last_commit_id_pr + run: echo "commit_id=$(git ls-remote --heads origin refs/heads/${{ github.head_ref }} | cut -f 1)" >> $GITHUB_OUTPUT + - name: Fetch last commit id from remote branch [WORKFLOW DISPATCH] + if: github.event_name == 'workflow_dispatch' && steps.no_changes.outcome != 'failure' + id: fetch_last_commit_id_wd + run: echo "commit_id=$(git rev-parse origin/${{ steps.extract_branch.outputs.branch }})" >> $GITHUB_OUTPUT + - name: Test Connector + if: steps.no_changes.outcome != 'failure' + timeout-minutes: 90 + env: + GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} + run: | + make tools.airbyte-ci-binary.install + airbyte-ci connectors \ + --name ${{matrix.connector}} \ + test + --global-status-check-context='Connectors Test: ${{matrix.connector}}'" diff --git a/.github/workflows/fix-pr-command.yml b/.github/workflows/fix-pr-command.yml new file mode 100644 index 00000000..6eebae8b --- /dev/null +++ b/.github/workflows/fix-pr-command.yml @@ -0,0 +1,174 @@ +name: On-Demand PR Auto-Fix + +on: + workflow_dispatch: + inputs: + pr: + description: 'PR Number' + type: string + required: true + comment-id: + description: 'Comment ID (Optional)' + type: string + required: false + +env: + AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }} + +jobs: + # This is copied from the `python_pytest.yml` file. + # Only the first two steps of the job are different, and they check out the PR's branch. + pr-fix-on-demand: + name: On-Demand PR Fix + # Don't run on forks. Run on pushes to main, and on PRs that are not from forks. + strategy: + matrix: + python-version: [ + '3.10', + ] + os: [ + Ubuntu, + ] + fail-fast: false + + runs-on: "${{ matrix.os }}-latest" + steps: + + # Custom steps to fetch the PR and checkout the code: + - name: Checkout Airbyte + uses: actions/checkout@v4 + with: + # Important that this is set so that CI checks are triggered again + # Without this we would be forever waiting on required checks to pass + token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} + + - name: Checkout PR (${{ github.event.inputs.pr }}) + uses: dawidd6/action-checkout-pr@v1 + with: + pr: ${{ github.event.inputs.pr }} + + - name: Get PR info + id: pr-info + run: | + PR_JSON=$(gh api repos/${{ github.repository }}/pulls/${{ github.event.inputs.pr }}) + echo "::set-output name=repo::$(echo "$PR_JSON" | jq -r .head.repo.full_name)" + echo "::set-output name=branch::$(echo "$PR_JSON" | jq -r .head.ref)" + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + shell: bash + + - name: Create URL to the run output + id: vars + run: echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT + + - name: Append comment with job run link + id: first-comment-action + uses: peter-evans/create-or-update-comment@v4 + with: + comment-id: ${{ github.event.inputs.comment-id }} + issue-number: ${{ github.event.inputs.pr }} + body: | + > **Auto-Fix Job Info** + > + > This job attempts to auto-fix any linting or formating issues. If any fixes are made, + > those changes will be automatically committed and pushed back to the PR. + > + > Note: This job can only be run by maintainers. On PRs from forks, this command requires + > that the PR author has enabled the `Allow edits from maintainers` option. + + > PR auto-fix job started... [Check job output.][1] + + [1]: ${{ steps.vars.outputs.run-url }} + + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'poetry' + - name: Install dependencies + run: poetry install --all-extras + + # Fix any lint or format issues + + - name: Auto-Fix Ruff Lint Issues + run: poetry run ruff check --fix . || true + - name: Auto-Fix Ruff Format Issues + run: poetry run ruff format . || true + + # Check for changes in git + + - name: Check for changes + id: git-diff + run: | + git diff --quiet && echo "No changes to commit" || echo "::set-output name=changes::true" + shell: bash + + # Commit changes (if any) + + - name: Commit changes + if: steps.git-diff.outputs.changes == 'true' + run: | + git config --global user.name "octavia-squidington-iii" + git config --global user.email "contact@airbyte.com" + git add . + git commit -m "Auto-fix lint and format issues" + + # Fix any further 'unsafe' lint issues in a separate commit + + - name: Auto-Fix Ruff Lint Issues (Unsafe) + run: poetry run ruff check --fix --unsafe-fixes . || true + - name: Auto-Fix Ruff Format Issues + run: poetry run ruff format . || true + + # Check for changes in git (2nd time, for 'unsafe' lint fixes) + + - name: Check for changes ('unsafe' fixes) + id: git-diff-2 + run: | + git diff --quiet && echo "No changes to commit" || echo "::set-output name=changes::true" + shell: bash + + - name: Commit 'unsafe' lint fixes + if: steps.git-diff-2.outputs.changes == 'true' + run: | + git config --global user.name "octavia-squidington-iii" + git config --global user.email "contact@airbyte.com" + git add . + git commit -m "Auto-fix lint issues (unsafe)" + + - name: Push changes to '(${{ steps.pr-info.outputs.repo }})' + if: steps.git-diff.outputs.changes == 'true' || steps.git-diff-2.outputs.changes == 'true' + run: | + git remote add contributor https://github.com/${{ steps.pr-info.outputs.repo }}.git + git push contributor HEAD:${{ steps.pr-info.outputs.branch }} + + - name: Append success comment + uses: peter-evans/create-or-update-comment@v4 + if: steps.git-diff.outputs.changes == 'true' || steps.git-diff-2.outputs.changes == 'true' + with: + comment-id: ${{ steps.first-comment-action.outputs.comment-id }} + reactions: hooray + body: | + > ✅ Changes applied successfully. + + - name: Append success comment (no-op) + uses: peter-evans/create-or-update-comment@v4 + if: steps.git-diff.outputs.changes != 'true' && steps.git-diff-2.outputs.changes != 'true' + with: + comment-id: ${{ steps.first-comment-action.outputs.comment-id }} + reactions: "+1" + body: | + > 🟦 Job completed successfully (no changes). + + - name: Append failure comment + uses: peter-evans/create-or-update-comment@v4 + if: failure() + with: + comment-id: ${{ steps.first-comment-action.outputs.comment-id }} + reactions: confused + body: | + > ❌ Job failed. diff --git a/.github/workflows/poetry-lock-command.yml b/.github/workflows/poetry-lock-command.yml new file mode 100644 index 00000000..a4a1145d --- /dev/null +++ b/.github/workflows/poetry-lock-command.yml @@ -0,0 +1,144 @@ +name: On-Demand Poetry Lock + +on: + workflow_dispatch: + inputs: + pr: + description: 'PR Number' + type: string + required: true + comment-id: + description: 'Comment ID (Optional)' + type: string + required: false + +env: + AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }} + +jobs: + poetry-lock-on-demand: + name: On-Demand Poetry Lock + strategy: + matrix: + python-version: [ + '3.10', + ] + os: [ + Ubuntu, + ] + fail-fast: false + + runs-on: "${{ matrix.os }}-latest" + steps: + + # Custom steps to fetch the PR and checkout the code: + - name: Checkout Airbyte + uses: actions/checkout@v4 + with: + # Important that this is set so that CI checks are triggered again + # Without this we would be forever waiting on required checks to pass + token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} + + - name: Checkout PR (${{ github.event.inputs.pr }}) + uses: dawidd6/action-checkout-pr@v1 + with: + pr: ${{ github.event.inputs.pr }} + + - name: Get PR info + id: pr-info + run: | + PR_JSON=$(gh api repos/${{ github.repository }}/pulls/${{ github.event.inputs.pr }}) + echo "::set-output name=repo::$(echo "$PR_JSON" | jq -r .head.repo.full_name)" + echo "::set-output name=branch::$(echo "$PR_JSON" | jq -r .head.ref)" + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + shell: bash + + - name: Create URL to the run output + id: vars + run: echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT + + - name: Append comment with job run link + id: first-comment-action + uses: peter-evans/create-or-update-comment@v4 + with: + comment-id: ${{ github.event.inputs.comment-id }} + issue-number: ${{ github.event.inputs.pr }} + body: | + > **Poetry-Lock Job Info** + > + > This job attempts to re-lock dependencies using `poetry lock` command. If any changes + > are made, those changes will be automatically committed and pushed back to the PR. + > + > Note: This job can only be run by maintainers. On PRs from forks, this command requires + > that the PR author has enabled the `Allow edits from maintainers` option. + > + > `poetry lock` job started... [Check job output.][1] + + [1]: ${{ steps.vars.outputs.run-url }} + + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'poetry' + + # Run `poetry lock` + + - name: Run `poetry lock` + run: poetry lock + + # Check for changes in git + + - name: Check for changes + id: git-diff + run: | + git diff --quiet && echo "No changes to commit" || echo "::set-output name=changes::true" + shell: bash + + # Commit changes (if any) + + - name: Commit changes + if: steps.git-diff.outputs.changes == 'true' + run: | + git config --global user.name "octavia-squidington-iii" + git config --global user.email "contact@airbyte.com" + git add . + git commit -m "Auto-commit `poetry lock` changes" + + - name: Push changes to '(${{ steps.pr-info.outputs.repo }})' + if: steps.git-diff.outputs.changes == 'true' + run: | + git remote add contributor https://github.com/${{ steps.pr-info.outputs.repo }}.git + git push contributor HEAD:${{ steps.pr-info.outputs.branch }} + + - name: Append success comment + uses: peter-evans/create-or-update-comment@v4 + if: steps.git-diff.outputs.changes == 'true' + with: + comment-id: ${{ steps.first-comment-action.outputs.comment-id }} + reactions: hooray + body: | + > ✅ `poetry lock` applied successfully. + + - name: Append success comment (no-op) + uses: peter-evans/create-or-update-comment@v4 + if: steps.git-diff.outputs.changes != 'true' && steps.git-diff-2.outputs.changes != 'true' + with: + comment-id: ${{ steps.first-comment-action.outputs.comment-id }} + reactions: "+1" + body: | + > 🟦 Job completed successfully (no changes). + + - name: Append failure comment + uses: peter-evans/create-or-update-comment@v4 + if: failure() + with: + comment-id: ${{ steps.first-comment-action.outputs.comment-id }} + reactions: confused + body: | + > ❌ Job failed. diff --git a/.github/workflows/pydoc_preview.yml b/.github/workflows/pydoc_preview.yml new file mode 100644 index 00000000..3ce7e4d9 --- /dev/null +++ b/.github/workflows/pydoc_preview.yml @@ -0,0 +1,40 @@ +name: Generate Docs + +on: + push: + branches: + - main + pull_request: {} + +env: + AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }} + +jobs: + preview_docs: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'poetry' + + - name: Install dependencies + run: poetry install --all-extras + + - name: Generate documentation + run: | + poetry run poe docs-generate + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + # Upload entire repository + path: 'docs/generated' diff --git a/.github/workflows/pydoc_publish.yml b/.github/workflows/pydoc_publish.yml new file mode 100644 index 00000000..bd70e1c8 --- /dev/null +++ b/.github/workflows/pydoc_publish.yml @@ -0,0 +1,63 @@ +name: Publish Docs + +on: + push: + branches: + - main + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +env: + AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }} + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + publish_docs: + runs-on: ubuntu-latest + environment: + name: "github-pages" + url: ${{ steps.deployment.outputs.page_url }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'poetry' + - name: Setup Pages + uses: actions/configure-pages@v5 + + - name: Install dependencies + run: poetry install --all-extras + + - name: Generate documentation + run: | + poetry run poe docs-generate + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + # Upload entire repository + path: 'docs/generated' + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/pypi_publish.yml b/.github/workflows/pypi_publish.yml new file mode 100644 index 00000000..65d98d31 --- /dev/null +++ b/.github/workflows/pypi_publish.yml @@ -0,0 +1,46 @@ +name: Build and/or Publish + +on: + push: + + workflow_dispatch: + +env: + AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }} + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: hynek/build-and-inspect-python-package@v2 + + publish: + name: Publish to PyPI + runs-on: ubuntu-latest + needs: [build] + permissions: + id-token: write # IMPORTANT: this permission is mandatory for trusted publishing + contents: write # Needed to upload artifacts to the release + environment: + name: PyPi + url: https://pypi.org/p/airbyte + if: startsWith(github.ref, 'refs/tags/') + steps: + - uses: actions/download-artifact@v4 + with: + name: Packages + path: dist + - name: Upload wheel to release + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: dist/*.whl + tag: ${{ github.ref }} + overwrite: true + file_glob: true + + - name: Publish + uses: pypa/gh-action-pypi-publish@v1.10.3 diff --git a/.github/workflows/python_lint.yml b/.github/workflows/python_lint.yml new file mode 100644 index 00000000..5598076c --- /dev/null +++ b/.github/workflows/python_lint.yml @@ -0,0 +1,88 @@ +name: Linters + +on: + push: + branches: + - main + pull_request: {} + +env: + AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }} + +jobs: + ruff-lint-check: + name: Ruff Lint Check + runs-on: ubuntu-latest + steps: + # Common steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'poetry' + - name: Install dependencies + run: poetry install --all-extras + + # Job-specifc step(s): + - name: Format code + run: poetry run ruff check . + + ruff-format-check: + name: Ruff Format Check + runs-on: ubuntu-latest + steps: + # Common steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'poetry' + - name: Install dependencies + run: poetry install --all-extras + + # Job-specifc step(s): + - name: Check code format + run: poetry run ruff format --check . + + mypy-check: + name: MyPy Check + runs-on: ubuntu-latest + steps: + # Common steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'poetry' + - name: Install dependencies + run: poetry install --all-extras + + # Job-specifc step(s): + + # For now, we run mypy only on modified files + - name: Get changed Python files + id: changed-py-files + uses: tj-actions/changed-files@v43 + with: + files: "airbyte_cdk/**/*.py" + - name: Run mypy on changed files + if: steps.changed-py-files.outputs.any_changed == 'true' + run: mypy ${{ steps.changed-py-files.outputs.all_changed_files }} --config-file mypy.ini --install-types --non-interactive diff --git a/.github/workflows/python_pytest.yml b/.github/workflows/python_pytest.yml new file mode 100644 index 00000000..1eaac864 --- /dev/null +++ b/.github/workflows/python_pytest.yml @@ -0,0 +1,140 @@ +# This workflow will run pytest. +# +# There are two job sets which run in parallel: +# 1. `pytest-fast`: Run fast tests only, and fail fast so the dev knows asap if they broke something. +# 2. `pytest`: Run all tests, across multiple Python versions. +# +# Note that `pytest-fast` also skips tests that require credentials, allowing it to run on forks. +name: PyTest + +on: + push: + branches: + - main + pull_request: {} + +env: + AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }} + +jobs: + pytest-fast: + name: Pytest (Fast) + runs-on: ubuntu-latest + steps: + # Common steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'poetry' + - name: Install dependencies + run: poetry install --all-extras + + - name: Run Pytest with Coverage (Fast Tests Only) + timeout-minutes: 60 + env: + GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} + run: > + poetry run coverage run -m pytest + --durations=5 --exitfirst + -m "not slow and not requires_creds and not linting and not flaky" + + - name: Run Pytest with Coverage (Flaky Tests Only) + timeout-minutes: 60 + continue-on-error: true + env: + GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} + run: > + poetry run coverage run -m pytest + --durations=5 --exitfirst + -m "flaky and not slow and not requires_creds" + + - name: Print Coverage Report + if: always() + run: poetry run coverage report + + - name: Create Coverage Artifacts + if: always() + run: | + poetry run coverage html -d htmlcov + poetry run coverage xml -o htmlcov/coverage.xml + + - name: Upload coverage to GitHub Artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: fasttest-coverage + path: htmlcov/ + + pytest: + name: Pytest (All, Python ${{ matrix.python-version }}, ${{ matrix.os }}) + # Don't run on forks. Run on pushes to main, and on PRs that are not from forks. + if: > + (github.event_name == 'push' && github.ref == 'refs/heads/main') || + (github.event.pull_request.head.repo.fork == false) + strategy: + matrix: + python-version: [ + '3.10', + '3.11', + #'3.12', # Currently blocked by Pendulum + ] + os: [ + Ubuntu, + # Windows, # For now, we don't include Windows in the test matrix. + ] + fail-fast: false + + runs-on: "${{ matrix.os }}-latest" + env: + # Enforce UTF-8 encoding so Windows runners don't fail inside the connector code. + # TODO: See if we can fully enforce this within PyAirbyte itself. + PYTHONIOENCODING: utf-8 + steps: + # Common steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'poetry' + - name: Install dependencies + run: poetry install --all-extras + + # Job-specific step(s): + - name: Run Pytest + timeout-minutes: 60 + env: + GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} + run: > + poetry run coverage run -m pytest + --durations=10 + -m "not linting and not super_slow and not flaky" + + - name: Print Coverage Report + if: always() + run: poetry run coverage report + + - name: Create Coverage Artifacts + if: always() + run: | + poetry run coverage html -d htmlcov + poetry run coverage xml -o htmlcov/coverage.xml + + - name: Upload coverage to GitHub Artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: py${{ matrix.python-version }}-${{ matrix.os }}-test-coverage + path: htmlcov/ diff --git a/.github/workflows/release_drafter.yml b/.github/workflows/release_drafter.yml new file mode 100644 index 00000000..f900002c --- /dev/null +++ b/.github/workflows/release_drafter.yml @@ -0,0 +1,29 @@ +name: Release Drafter + +on: + push: + branches: + - main + pull_request: + # Only following types are handled by the action, but one can default to all as well + types: [opened, reopened, synchronize] + # # pull_request_target event is required for autolabeler to support PRs from forks + # pull_request_target: + # types: [opened, reopened, synchronize] + +env: + AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }} + +jobs: + update_release_draft: + permissions: + contents: write + pull-requests: write + runs-on: ubuntu-latest + steps: + # Drafts the next Release notes as Pull Requests are merged into "main" + - uses: release-drafter/release-drafter@v6 + with: + config-name: release-drafter.yml + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/semantic_pr_check.yml b/.github/workflows/semantic_pr_check.yml new file mode 100644 index 00000000..3c0947d8 --- /dev/null +++ b/.github/workflows/semantic_pr_check.yml @@ -0,0 +1,54 @@ +name: "Verify Semantic PR Title" + +on: + pull_request: + types: + - opened + - edited + - synchronize + - ready_for_review + +permissions: + pull-requests: read + +jobs: + validate_pr_title: + name: Validate PR title + runs-on: ubuntu-latest + steps: + - uses: amannn/action-semantic-pull-request@v5 + if: ${{ github.event.pull_request.draft == false }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + # Configure which types are allowed (newline-delimited). + # See: https://github.com/commitizen/conventional-commit-types/blob/master/index.json + types: | + fix + Fix + feat + Feat + docs + Docs + ci + CI + chore + Chore + build + Build + test + Test + + # # We don't use scopes as of now + # scopes: | + # core + # ui + # JIRA-\d+ + + # Require capitalization for the first letter of the subject. + subjectPattern: ^[A-Z].*$ + # The variables `subject` and `title` can be used within the message. + subjectPatternError: | + The subject "{subject}" found in the pull request title "{title}" + didn't match the configured pattern. Please check the title against + the naming rules. You can also use the [WIP] prefix to bypass this check. diff --git a/.github/workflows/slash_command_dispatch.yml b/.github/workflows/slash_command_dispatch.yml new file mode 100644 index 00000000..25c92ad5 --- /dev/null +++ b/.github/workflows/slash_command_dispatch.yml @@ -0,0 +1,42 @@ +name: Slash Command Dispatch + +on: + issue_comment: + types: [created] + +env: + AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }} + +jobs: + slashCommandDispatch: + # Only allow slash commands on pull request (not on issues) + if: ${{ github.event.issue.pull_request }} + runs-on: ubuntu-latest + steps: + + - name: Slash Command Dispatch + id: dispatch + uses: peter-evans/slash-command-dispatch@v4 + with: + repository: ${{ github.repository }} + token: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }} + dispatch-type: workflow + issue-type: pull-request + commands: | + fix-pr + test-pr + poetry-lock + static-args: | + pr=${{ github.event.issue.number }} + comment-id=${{ github.event.comment.id }} + + # Only run for users with 'write' permission on the main repository + permission: write + + - name: Edit comment with error message + if: steps.dispatch.outputs.error-message + uses: peter-evans/create-or-update-comment@v4 + with: + comment-id: ${{ github.event.comment.id }} + body: | + > Error: ${{ steps.dispatch.outputs.error-message }} diff --git a/.github/workflows/test-pr-command.yml b/.github/workflows/test-pr-command.yml new file mode 100644 index 00000000..2636f46d --- /dev/null +++ b/.github/workflows/test-pr-command.yml @@ -0,0 +1,163 @@ +name: On-Demand PR Test + +on: + workflow_dispatch: + inputs: + pr: + description: 'PR Number' + type: string + required: true + comment-id: + description: 'Comment ID (Optional)' + type: string + required: false + +env: + AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }} + +jobs: + start-workflow: + name: Append 'Starting' Comment + runs-on: ubuntu-latest + steps: + - name: Get PR JSON + id: pr-info + env: + GH_TOKEN: ${{ github.token }} + run: | + PR_JSON=$(gh api repos/${{ github.repository }}/pulls/${{ github.event.inputs.pr }}) + echo "$PR_JSON" > pr-info.json + echo "sha=$(cat pr-info.json | jq -r .head.sha)" >> $GITHUB_OUTPUT + echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT + - name: Upload PR details as artifact + uses: actions/upload-artifact@v4 + with: + name: pr-info + path: pr-info.json + - name: Append comment with job run link + id: first-comment-action + uses: peter-evans/create-or-update-comment@v4 + with: + comment-id: ${{ github.event.inputs.comment-id }} + issue-number: ${{ github.event.inputs.pr }} + body: | + + > PR test job started... [Check job output.][1] + + [1]: ${{ steps.pr-info.outputs.run-url }} + + # This is copied from the `python_pytest.yml` file. + # Only the first two steps of the job are different, and they check out the PR's branch. + pytest-on-demand: + name: On-Demand PR Pytest (All, Python ${{ matrix.python-version }}, ${{ matrix.os }}) + needs: [start-workflow] + strategy: + matrix: + python-version: [ + '3.10', + '3.11', + ] + os: [ + Ubuntu, + # Windows, # For now, we don't include Windows in the test matrix. + ] + fail-fast: false + runs-on: "${{ matrix.os }}-latest" + env: + # Enforce UTF-8 encoding so Windows runners don't fail inside the connector code. + # TODO: See if we can fully enforce this within PyAirbyte itself. + PYTHONIOENCODING: utf-8 + steps: + + # Custom steps to fetch the PR and checkout the code: + + - name: Download PR info + # This puts the `pr-info.json` file in the current directory. + # We need this to get the PR's SHA at the time of the workflow run. + uses: actions/download-artifact@v4 + with: + name: pr-info + + - name: Checkout PR + uses: actions/checkout@v4 + with: + token: ${{ secrets.GITHUB_TOKEN }} + - name: Checkout PR (${{ github.event.inputs.pr }}) + uses: dawidd6/action-checkout-pr@v1 + with: + pr: ${{ github.event.inputs.pr }} + + # Same as the `python_pytest.yml` file: + + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'poetry' + - name: Install dependencies + run: poetry install --all-extras + + - name: Run Pytest + timeout-minutes: 60 + env: + GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} + run: > + poetry run pytest + --verbose + -m "not super_slow and not flaky" + + - name: Run Pytest (Flaky Only) + continue-on-error: true + timeout-minutes: 60 + env: + GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} + run: > + poetry run pytest + --verbose + -m "flaky and not super_slow" + + - name: Post CI Success to GitHub + run: | + curl --request POST \ + --url https://api.github.com/repos/${{ github.repository }}/statuses/$(cat pr-info.json | jq -r .head.sha) \ + --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \ + --header 'content-type: application/json' \ + --data '{ + "state": "success", + "context": "Pytest (All, Python ${{ matrix.python-version }}, ${{ matrix.os }})", + "target_url": "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}", + }' \ + + log-success-comment: + name: Append 'Success' Comment + needs: [pytest-on-demand] + runs-on: ubuntu-latest + steps: + - name: Append success comment + uses: peter-evans/create-or-update-comment@v4 + with: + issue-number: ${{ github.event.inputs.pr }} + comment-id: ${{ github.event.inputs.comment-id }} + reactions: hooray + body: | + > ✅ Tests passed. + + log-failure-comment: + name: Append 'Failure' Comment + # This job will only run if the workflow fails + needs: [pytest-on-demand, start-workflow] + if: always() && needs.pytest-on-demand.result == 'failure' + runs-on: ubuntu-latest + steps: + - name: Append failure comment + uses: peter-evans/create-or-update-comment@v4 + with: + issue-number: ${{ github.event.inputs.pr }} + comment-id: ${{ github.event.inputs.comment-id }} + reactions: confused + body: | + > ❌ Tests failed. diff --git a/poetry.lock b/poetry.lock index c68a760f..aec7fe5f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4187,6 +4187,33 @@ typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.1 [package.extras] jupyter = ["ipywidgets (>=7.5.1,<9)"] +[[package]] +name = "ruff" +version = "0.7.3" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.7.3-py3-none-linux_armv6l.whl", hash = "sha256:34f2339dc22687ec7e7002792d1f50712bf84a13d5152e75712ac08be565d344"}, + {file = "ruff-0.7.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:fb397332a1879b9764a3455a0bb1087bda876c2db8aca3a3cbb67b3dbce8cda0"}, + {file = "ruff-0.7.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:37d0b619546103274e7f62643d14e1adcbccb242efda4e4bdb9544d7764782e9"}, + {file = "ruff-0.7.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d59f0c3ee4d1a6787614e7135b72e21024875266101142a09a61439cb6e38a5"}, + {file = "ruff-0.7.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:44eb93c2499a169d49fafd07bc62ac89b1bc800b197e50ff4633aed212569299"}, + {file = "ruff-0.7.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6d0242ce53f3a576c35ee32d907475a8d569944c0407f91d207c8af5be5dae4e"}, + {file = "ruff-0.7.3-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:6b6224af8b5e09772c2ecb8dc9f3f344c1aa48201c7f07e7315367f6dd90ac29"}, + {file = "ruff-0.7.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c50f95a82b94421c964fae4c27c0242890a20fe67d203d127e84fbb8013855f5"}, + {file = "ruff-0.7.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7f3eff9961b5d2644bcf1616c606e93baa2d6b349e8aa8b035f654df252c8c67"}, + {file = "ruff-0.7.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8963cab06d130c4df2fd52c84e9f10d297826d2e8169ae0c798b6221be1d1d2"}, + {file = "ruff-0.7.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:61b46049d6edc0e4317fb14b33bd693245281a3007288b68a3f5b74a22a0746d"}, + {file = "ruff-0.7.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:10ebce7696afe4644e8c1a23b3cf8c0f2193a310c18387c06e583ae9ef284de2"}, + {file = "ruff-0.7.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:3f36d56326b3aef8eeee150b700e519880d1aab92f471eefdef656fd57492aa2"}, + {file = "ruff-0.7.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5d024301109a0007b78d57ab0ba190087b43dce852e552734ebf0b0b85e4fb16"}, + {file = "ruff-0.7.3-py3-none-win32.whl", hash = "sha256:4ba81a5f0c5478aa61674c5a2194de8b02652f17addf8dfc40c8937e6e7d79fc"}, + {file = "ruff-0.7.3-py3-none-win_amd64.whl", hash = "sha256:588a9ff2fecf01025ed065fe28809cd5a53b43505f48b69a1ac7707b1b7e4088"}, + {file = "ruff-0.7.3-py3-none-win_arm64.whl", hash = "sha256:1713e2c5545863cdbfe2cbce21f69ffaf37b813bfd1fb3b90dc9a6f1963f5a8c"}, + {file = "ruff-0.7.3.tar.gz", hash = "sha256:e1d1ba2e40b6e71a61b063354d04be669ab0d39c352461f3d789cac68b54a313"}, +] + [[package]] name = "scikit-learn" version = "1.5.2" @@ -5220,4 +5247,3 @@ vector-db-based = ["cohere", "langchain", "openai", "tiktoken"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "221934ee1237b058139798d465b10d05eb0e22581fbbcfde19de862e8d120261" diff --git a/pyproject.toml b/pyproject.toml index ce0de1de..bb4a02e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,6 +81,8 @@ xmltodict = "^0.13.0" freezegun = "*" mypy = "*" asyncio = "3.4.3" +ruff = "^0.7.2" +pdoc = "^15.0.0" poethepoet = "^0.24.2" pyproject-flake8 = "^6.1.0" pytest = "^7" @@ -89,7 +91,6 @@ pytest-cov = "*" pytest-httpserver = "*" pytest-mock = "*" requests-mock = "*" -pdoc = "^15.0.0" [tool.poetry.extras] file-based = ["avro", "fastavro", "pyarrow", "unstructured", "pdf2image", "pdfminer.six", "unstructured.pytesseract", "pytesseract", "markdown", "python-calamine", "python-snappy"] @@ -109,12 +110,43 @@ assemble = {cmd = "bin/generate-component-manifest-dagger.sh", help = "Generate build-package = {cmd = "poetry build", help = "Build the python package: source and wheels archives."} build = {sequence = ["assemble", "build-package"], help = "Run all tasks to build the package."} -# Check tasks -lint = {cmd = "pflake8 --config ../../pyproject.toml ./", help = "Lint with flake8."} +# Format check tasks +_format-check-ruff = {cmd = "ruff format --check .", help = "Check formatting with Ruff."} +_format-check-prettier = {cmd = "npx prettier . --check", help = "Check formatting with prettier."} +format-check = {sequence = ["_format-check-ruff", "_format-check-prettier"], help = "Check formatting for all file types.", ignore_fail = "return_non_zero"} + +# Format fix tasks +_format-fix-ruff = {cmd = "ruff format .", help = "Format with Ruff."} +_format-fix-prettier = {cmd = "npx prettier . --write", help = "Format with prettier."} +format-fix = {sequence = ["_format-fix-ruff", "_format-fix-prettier"], help = "Format all file types.", ignore_fail = "return_non_zero"} + +# Linting/Typing check tasks +_lint-ruff = {cmd = "poetry run ruff check .", help = "Lint with Ruff."} type-check = {cmd = "bin/run-mypy-on-modified-files.sh", help = "Type check modified files with mypy."} -unit-test-with-cov = {cmd = "pytest -s unit_tests -c pytest.ini --cov=airbyte_cdk --cov-report=term --cov-config ../../pyproject.toml", help = "Run unit tests and create a coverage report."} -# TODO: find a version of the modified mypy check that works both locally and in CI. +lint = {sequence = ["_lint-ruff", "type-check"], help = "Lint all code. Includes type checking.", ignore_fail = "return_non_zero"} + +# Lockfile check task check-lockfile = {cmd = "poetry check", help = "Check the poetry lock file."} + +# Linting/Typing fix tasks +lint-fix = { cmd = "poetry run ruff check --fix ." } +lint-fix-unsafe = { cmd = "poetry run ruff check --fix --unsafe-fixes .", help = "Lint-fix modified files, including 'unsafe' fixes. It is recommended to first commit any pending changes and then always manually review any unsafe changes applied." } + +# Combined Check and Fix tasks + +check-all = {sequence = ["lint", "format-check", "type-check", "check-lockfile"], help = "Lint, format, and type-check modified files.", ignore_fail = "return_non_zero"} +fix-all = {sequence = ["format-fix", "lint-fix"], help = "Lint-fix and format-fix modified files, ignoring unsafe fixes.", ignore_fail = "return_non_zero"} +fix-and-check = {sequence = ["fix-all", "check-all"], help = "Lint-fix and format-fix, then re-check to see if any issues remain.", ignore_fail = "return_non_zero"} + +# PyTest tasks + +pytest = {cmd = "poetry run coverage run -m pytest --durations=10", help = "Run all pytest tests."} +pytest-fast = {cmd = "poetry run coverage run -m pytest --durations=5 --exitfirst -m 'not flaky and not slow and not requires_creds'", help = "Run pytest tests, failing fast and excluding slow tests."} +unit-test-with-cov = {cmd = "pytest -s unit_tests --cov=airbyte_cdk --cov-report=term --cov-config ./pyproject.toml", help = "Run unit tests and create a coverage report."} + +# Combined check tasks (other) + +# TODO: find a version of the modified mypy check that works both locally and in CI. check-local = {sequence = ["lint", "type-check", "check-lockfile", "unit-test-with-cov"], help = "Lint all code, type-check modified files, and run unit tests."} check-ci = {sequence = ["check-lockfile", "build", "lint", "unit-test-with-cov"], help = "Build the package, lint and run unit tests. Does not include type-checking."} @@ -125,6 +157,21 @@ pre-push = {sequence = ["build", "check-local"], help = "Run all build and check docs-generate = {env = {PDOC_ALLOW_EXEC = "1"}, cmd = "python -m docs.generate run"} docs-preview = {shell = "poe docs-generate && open docs/generated/index.html"} +[tool.check-wheel-contents] +# Quality control for Python wheel generation. Docs here: +# - https://github.com/jwodder/check-wheel-contents +ignore = [ + "W002" # Duplicate files. (TODO: Fix the few duplicate files, mostly `__init__.py` files that have only copyright text.) +] + +[tool.pytest.ini_options] +log_cli = true +log_cli_level = "INFO" +log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)" +log_cli_date_format = "%Y-%m-%d %H:%M:%S" +filterwarnings = [ + "ignore::airbyte_cdk.sources.source.ExperimentalClassWarning" +] [tool.airbyte_ci] python_versions = ["3.10", "3.11"]