From 3375e65486d8555d837207b1b47f557e040bf1ba Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Mon, 16 Dec 2024 17:50:15 -0800 Subject: [PATCH] feat(gha): various docker / docker-compose build improvements (#31386) --- .dockerignore | 1 + .github/actions/setup-docker/action.yml | 69 +++++++++++++++++++++++++ .github/workflows/docker.yml | 42 +++++++++------ .github/workflows/ephemeral-env.yml | 26 ++++------ .github/workflows/tag-release.yml | 20 +++---- Dockerfile | 59 +++++++++++++-------- docker-compose.yml | 6 ++- docker/docker-bootstrap.sh | 6 ++- docker/docker-frontend.sh | 2 +- docs/docs/contributing/development.mdx | 13 +++++ requirements/development.in | 2 +- requirements/development.txt | 6 +-- superset-frontend/webpack.config.js | 3 +- superset/__init__.py | 3 +- 14 files changed, 180 insertions(+), 78 deletions(-) create mode 100644 .github/actions/setup-docker/action.yml diff --git a/.dockerignore b/.dockerignore index 33b76412b60cb..31c873f0073f9 100644 --- a/.dockerignore +++ b/.dockerignore @@ -42,6 +42,7 @@ docs/ install/ superset-frontend/cypress-base/ superset-frontend/coverage/ +superset-frontend/.temp_cache/ superset/static/assets/ superset-websocket/dist/ venv diff --git a/.github/actions/setup-docker/action.yml b/.github/actions/setup-docker/action.yml new file mode 100644 index 0000000000000..71a559829f673 --- /dev/null +++ b/.github/actions/setup-docker/action.yml @@ -0,0 +1,69 @@ +name: "Setup Docker Environment" +description: "Reusable steps for setting up QEMU, Docker Buildx, DockerHub login, Supersetbot, and optionally Docker Compose" +inputs: + build: + description: "Used for building?" + required: false + default: "false" + dockerhub-user: + description: "DockerHub username" + required: false + dockerhub-token: + description: "DockerHub token" + required: false + install-docker-compose: + description: "Flag to install Docker Compose" + required: false + default: "true" + login-to-dockerhub: + description: "Whether you want to log into dockerhub" + required: false + default: "true" +outputs: {} +runs: + using: "composite" + steps: + + - name: Set up QEMU + if: ${{ inputs.build == 'true' }} + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + if: ${{ inputs.build == 'true' }} + uses: docker/setup-buildx-action@v3 + + - name: Try to login to DockerHub + if: ${{ inputs.login-to-dockerhub == 'true' }} + continue-on-error: true + uses: docker/login-action@v3 + with: + username: ${{ inputs.dockerhub-user }} + password: ${{ inputs.dockerhub-token }} + + - name: Install Docker Compose + if: ${{ inputs.install-docker-compose == 'true' }} + shell: bash + run: | + sudo apt-get update + sudo apt-get install -y ca-certificates curl + sudo install -m 0755 -d /etc/apt/keyrings + + # Download and save the Docker GPG key in the correct format + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg + + # Ensure the key file is readable + sudo chmod a+r /etc/apt/keyrings/docker.gpg + + # Add the Docker repository using the correct key + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + + # Update package lists and install Docker Compose plugin + sudo apt update + sudo apt install -y docker-compose-plugin + + - name: Docker Version Info + shell: bash + run: docker info diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 287026c084412..54893a137aff0 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -15,20 +15,20 @@ concurrency: jobs: setup_matrix: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 outputs: matrix_config: ${{ steps.set_matrix.outputs.matrix_config }} steps: - id: set_matrix run: | - MATRIX_CONFIG=$(if [ "${{ github.event_name }}" == "pull_request" ]; then echo '["dev"]'; else echo '["dev", "lean", "py310", "websocket", "dockerize", "py311"]'; fi) + MATRIX_CONFIG=$(if [ "${{ github.event_name }}" == "pull_request" ]; then echo '["dev", "lean"]'; else echo '["dev", "lean", "py310", "websocket", "dockerize", "py311"]'; fi) echo "matrix_config=${MATRIX_CONFIG}" >> $GITHUB_OUTPUT echo $GITHUB_OUTPUT docker-build: name: docker-build needs: setup_matrix - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 strategy: matrix: build_preset: ${{fromJson(needs.setup_matrix.outputs.matrix_config)}} @@ -50,21 +50,13 @@ jobs: with: token: ${{ secrets.GITHUB_TOKEN }} - - name: Set up QEMU + - name: Setup Docker Environment if: steps.check.outputs.python || steps.check.outputs.frontend || steps.check.outputs.docker - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - if: steps.check.outputs.python || steps.check.outputs.frontend || steps.check.outputs.docker - uses: docker/setup-buildx-action@v3 - - - name: Try to login to DockerHub - if: steps.check.outputs.python || steps.check.outputs.frontend || steps.check.outputs.docker - continue-on-error: true - uses: docker/login-action@v3 + uses: ./.github/actions/setup-docker with: - username: ${{ secrets.DOCKERHUB_USER }} - password: ${{ secrets.DOCKERHUB_TOKEN }} + dockerhub-user: ${{ secrets.DOCKERHUB_USER }} + dockerhub-token: ${{ secrets.DOCKERHUB_TOKEN }} + build: "true" - name: Setup supersetbot if: steps.check.outputs.python || steps.check.outputs.frontend || steps.check.outputs.docker @@ -87,4 +79,22 @@ jobs: --preset ${{ matrix.build_preset }} \ --context "$EVENT" \ --context-ref "$RELEASE" $FORCE_LATEST \ + --extra-flags "--build-arg INCLUDE_CHROMIUM=false" \ $PLATFORM_ARG + + - name: Print docker stats + if: steps.check.outputs.python || steps.check.outputs.frontend || steps.check.outputs.docker + run: | + IMAGE_ID=$(docker images --filter "label=sha=${{ github.sha }}" --format "{{.ID}}" | head -n 1) + echo "SHA: ${{ github.sha }}" + echo "IMAGE: $IMAGE_ID" + docker images $IMAGE_ID + docker history $IMAGE_ID + + - name: docker-compose sanity check + if: (steps.check.outputs.python || steps.check.outputs.frontend || steps.check.outputs.docker) && (matrix.build_preset == 'dev' || matrix.build_preset == 'lean') + shell: bash + run: | + export SUPERSET_BUILD_TARGET=${{ matrix.build_preset }} + docker compose build superset-init --build-arg DEV_MODE=false --build-arg INCLUDE_CHROMIUM=false + docker compose up superset-init --exit-code-from superset-init diff --git a/.github/workflows/ephemeral-env.yml b/.github/workflows/ephemeral-env.yml index acf3b0cc72124..ba42315327d65 100644 --- a/.github/workflows/ephemeral-env.yml +++ b/.github/workflows/ephemeral-env.yml @@ -27,6 +27,9 @@ jobs: outputs: slash-command: ${{ steps.eval-body.outputs.result }} feature-flags: ${{ steps.eval-feature-flags.outputs.result }} + env: + DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USER }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} steps: - name: Debug @@ -139,29 +142,20 @@ jobs: ref: ${{ steps.get-sha.outputs.sha }} persist-credentials: false - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + - name: Setup Docker Environment + uses: ./.github/actions/setup-docker + with: + dockerhub-user: ${{ secrets.DOCKERHUB_USER }} + dockerhub-token: ${{ secrets.DOCKERHUB_TOKEN }} + build: "true" + install-docker-compose: "false" - name: Setup supersetbot uses: ./.github/actions/setup-supersetbot/ - - name: Try to login to DockerHub - if: steps.check.outputs.python || steps.check.outputs.frontend || steps.check.outputs.docker - continue-on-error: true - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USER }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Build ephemeral env image env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - username: ${{ secrets.DOCKERHUB_USER }} - password: ${{ secrets.DOCKERHUB_TOKEN }} run: | supersetbot docker \ --preset ci \ diff --git a/.github/workflows/tag-release.yml b/.github/workflows/tag-release.yml index ec06bc8e1cf63..ce7e35b48e73e 100644 --- a/.github/workflows/tag-release.yml +++ b/.github/workflows/tag-release.yml @@ -45,17 +45,20 @@ jobs: build_preset: ["dev", "lean", "py310", "websocket", "dockerize", "py311"] fail-fast: false steps: - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: fetch-depth: 0 + - name: Setup Docker Environment + uses: ./.github/actions/setup-docker + with: + dockerhub-user: ${{ secrets.DOCKERHUB_USER }} + dockerhub-token: ${{ secrets.DOCKERHUB_TOKEN }} + install-docker-compose: "false" + build: "true" + - name: Use Node.js 20 uses: actions/setup-node@v4 with: @@ -64,13 +67,6 @@ jobs: - name: Setup supersetbot uses: ./.github/actions/setup-supersetbot/ - - name: Try to login to DockerHub - continue-on-error: true - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USER }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Execute custom Node.js script env: DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USER }} diff --git a/Dockerfile b/Dockerfile index 4ee30930898d3..ea2586e0acc51 100644 --- a/Dockerfile +++ b/Dockerfile @@ -52,12 +52,11 @@ WORKDIR /app/superset-frontend RUN mkdir -p /app/superset/static/assets \ /app/superset/translations -# Copy translation files -COPY superset/translations /app/superset/translations - # Mount package files and install dependencies if not in dev mode RUN --mount=type=bind,source=./superset-frontend/package.json,target=./package.json \ --mount=type=bind,source=./superset-frontend/package-lock.json,target=./package-lock.json \ + --mount=type=cache,target=/root/.cache \ + --mount=type=cache,target=/root/.npm \ if [ "$DEV_MODE" = "false" ]; then \ npm ci; \ else \ @@ -68,16 +67,24 @@ RUN --mount=type=bind,source=./superset-frontend/package.json,target=./package.j COPY superset-frontend /app/superset-frontend # Build the frontend if not in dev mode -RUN if [ "$DEV_MODE" = "false" ]; then \ +RUN --mount=type=cache,target=/app/superset-frontend/.temp_cache \ + --mount=type=cache,target=/root/.npm \ + if [ "$DEV_MODE" = "false" ]; then \ echo "Running 'npm run ${BUILD_CMD}'"; \ - if [ "$BUILD_TRANSLATIONS" = "true" ]; then \ - npm run build-translation; \ - fi; \ npm run ${BUILD_CMD}; \ else \ echo "Skipping 'npm run ${BUILD_CMD}' in dev mode"; \ - fi && \ - rm -rf /app/superset/translations/*/*/*.po + fi; + +# Copy translation files +COPY superset/translations /app/superset/translations + +# Build the frontend if not in dev mode +RUN if [ "$BUILD_TRANSLATIONS" = "true" ]; then \ + npm run build-translation; \ + fi; \ + rm -rf /app/superset/translations/*/*/*.po; \ + rm -rf /app/superset/translations/*/*/*.mo; ###################################################################### @@ -103,7 +110,7 @@ RUN useradd --user-group -d ${SUPERSET_HOME} -m --no-log-init --shell /bin/bash # Some bash scripts needed throughout the layers COPY --chmod=755 docker/*.sh /app/docker/ -RUN pip install --no-cache-dir --upgrade setuptools pip uv +RUN pip install --no-cache-dir --upgrade uv # Using uv as it's faster/simpler than pip RUN uv venv /app/.venv @@ -112,9 +119,9 @@ ENV PATH="/app/.venv/bin:${PATH}" # Install Playwright and optionally setup headless browsers ARG INCLUDE_CHROMIUM="true" ARG INCLUDE_FIREFOX="false" -RUN --mount=type=cache,target=/root/.cache/pip \ +RUN --mount=type=cache,target=/root/.cache/uv\ if [ "$INCLUDE_CHROMIUM" = "true" ] || [ "$INCLUDE_FIREFOX" = "true" ]; then \ - pip install playwright && \ + uv pip install playwright && \ playwright install-deps && \ if [ "$INCLUDE_CHROMIUM" = "true" ]; then playwright install chromium; fi && \ if [ "$INCLUDE_FIREFOX" = "true" ]; then playwright install firefox; fi; \ @@ -129,12 +136,15 @@ FROM python-base AS python-translation-compiler # Install Python dependencies using docker/pip-install.sh COPY requirements/translations.txt requirements/ -RUN --mount=type=cache,target=/root/.cache/pip \ +RUN --mount=type=cache,target=/root/.cache/uv \ /app/docker/pip-install.sh -r requirements/translations.txt COPY superset/translations/ /app/translations_mo/ -RUN pybabel compile -d /app/translations_mo | true && \ - rm -f /app/translations_mo/*/*/*.po +RUN if [ "$BUILD_TRANSLATIONS" = "true" ]; then \ + pybabel compile -d /app/translations_mo | true; \ + fi; \ + rm -f /app/translations_mo/*/*/*.po; \ + rm -f /app/translations_mo/*/*/*.json; ###################################################################### # Python APP common layer @@ -175,6 +185,11 @@ RUN /app/docker/apt-install.sh \ # Copy compiled things from previous stages COPY --from=superset-node /app/superset/static/assets superset/static/assets +# TODO, when the next version comes out, use --exclude superset/translations +COPY superset superset +# TODO in the meantime, remove the .po files +RUN rm superset/translations/*/*/*.po + # Merging translations from backend and frontend stages COPY --from=superset-node /app/superset/translations superset/translations COPY --from=python-translation-compiler /app/translations_mo superset/translations @@ -187,12 +202,13 @@ EXPOSE ${SUPERSET_PORT} # Final lean image... ###################################################################### FROM python-common AS lean -COPY superset superset # Install Python dependencies using docker/pip-install.sh COPY requirements/base.txt requirements/ -RUN --mount=type=cache,target=/root/.cache/pip \ - /app/docker/pip-install.sh --requires-build-essential -r requirements/base.txt && \ +RUN --mount=type=cache,target=/root/.cache/uv \ + /app/docker/pip-install.sh --requires-build-essential -r requirements/base.txt +# Install the superset package +RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install . RUN python -m compileall /app/superset @@ -203,7 +219,6 @@ USER superset # Dev image... ###################################################################### FROM python-common AS dev -COPY superset superset # Debian libs needed for dev RUN /app/docker/apt-install.sh \ @@ -214,8 +229,10 @@ RUN /app/docker/apt-install.sh \ # Copy development requirements and install them COPY requirements/*.txt requirements/ # Install Python dependencies using docker/pip-install.sh -RUN --mount=type=cache,target=/root/.cache/pip \ - /app/docker/pip-install.sh --requires-build-essential -r requirements/development.txt && \ +RUN --mount=type=cache,target=/root/.cache/uv \ + /app/docker/pip-install.sh --requires-build-essential -r requirements/development.txt +# Install the superset package +RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install . RUN python -m compileall /app/superset diff --git a/docker-compose.yml b/docker-compose.yml index 32355dbad2daa..e248e973e6355 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -35,11 +35,14 @@ x-superset-volumes: &superset-volumes x-common-build: &common-build context: . - target: dev + target: ${SUPERSET_BUILD_TARGET:-dev} # can use `dev` (default) or `lean` cache_from: - apache/superset-cache:3.10-slim-bookworm args: DEV_MODE: "true" + INCLUDE_CHROMIUM: ${INCLUDE_CHROMIUM:-false} + INCLUDE_FIREFOX: ${INCLUDE_FIREFOX:-false} + BUILD_TRANSLATIONS: ${BUILD_TRANSLATIONS:-false} services: nginx: @@ -157,6 +160,7 @@ services: # and build it on startup while firing docker-frontend.sh in dev mode, where # it'll mount and watch local files and rebuild as you update them DEV_MODE: "true" + BUILD_TRANSLATIONS: ${BUILD_TRANSLATIONS:-false} environment: # set this to false if you have perf issues running the npm i; npm run dev in-docker # if you do so, you have to run this manually on the host, which should perform better! diff --git a/docker/docker-bootstrap.sh b/docker/docker-bootstrap.sh index 1a4e04be94e7a..1338e0cea5dd7 100755 --- a/docker/docker-bootstrap.sh +++ b/docker/docker-bootstrap.sh @@ -30,12 +30,16 @@ if [ "$CYPRESS_CONFIG" == "true" ]; then export SUPERSET_TESTENV=true export SUPERSET__SQLALCHEMY_DATABASE_URI=postgresql+psycopg2://superset:superset@db:5432/superset fi +if [[ "$DATABASE_DIALECT" == postgres* ]] ; then + echo "Installing postgres requirements" + uv pip install -e .[postgres] +fi # # Make sure we have dev requirements installed # if [ -f "${REQUIREMENTS_LOCAL}" ]; then echo "Installing local overrides at ${REQUIREMENTS_LOCAL}" - pip install --no-cache-dir -r "${REQUIREMENTS_LOCAL}" + uv pip install --no-cache-dir -r "${REQUIREMENTS_LOCAL}" else echo "Skipping local overrides" fi diff --git a/docker/docker-frontend.sh b/docker/docker-frontend.sh index b80e12a0ca79a..f851576730fce 100755 --- a/docker/docker-frontend.sh +++ b/docker/docker-frontend.sh @@ -35,7 +35,7 @@ if [ "$BUILD_SUPERSET_FRONTEND_IN_DOCKER" = "true" ]; then echo "Running `npm install`" npm install - echo "Running frontend" + echo "Start webpack dev server" npm run dev else diff --git a/docs/docs/contributing/development.mdx b/docs/docs/contributing/development.mdx index a20e1e246fd6c..9ecce26fcb557 100644 --- a/docs/docs/contributing/development.mdx +++ b/docs/docs/contributing/development.mdx @@ -72,6 +72,19 @@ documentation. configured to be secure. ::: +### Supported environment variables + +Affecting the Docker build process: +- **SUPERSET_BUILD_TARGET (default=dev):** which --target to build, either `lean` or `dev` are commonly used +- **INCLUDE_FIREFOX (default=false):** whether to include the Firefox headless browser in the build +- **INCLUDE_CHROMIUM (default=false):** whether to include the Firefox headless browser in the build +- **BUILD_TRANSLATIONS(default=false):** whether to compile the translations from the .po files available + +For more env vars that affect your configuration, see this +[superset_config.py](https://github.com/apache/superset/blob/master/docker/pythonpath_dev/superset_config.py) +used in the `docker compose` context to assign env vars to the superset configuration. + + ### Nuking the postgres database At times, it's possible to end up with your development database in a bad state, it's diff --git a/requirements/development.in b/requirements/development.in index e48d78f1d37ea..c82f209238fcb 100644 --- a/requirements/development.in +++ b/requirements/development.in @@ -17,4 +17,4 @@ # under the License. # -r base.in --e .[development,bigquery,cors,druid,gevent,gsheets,mysql,playwright,postgres,presto,prophet,trino,thumbnails] +-e .[development,bigquery,cors,druid,gevent,gsheets,mysql,postgres,presto,prophet,trino,thumbnails] diff --git a/requirements/development.txt b/requirements/development.txt index 1afbd47d0c213..41fc32b9e93eb 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -1,4 +1,4 @@ -# SHA1:dc767a7288b56c785b0cd3c38e95e7b5e66be1ac +# SHA1:9a543d5cd98c8ac766ad556ef5bd59de47834c9e # # This file is autogenerated by pip-compile-multi # To update, run: @@ -113,8 +113,6 @@ pip-compile-multi==2.6.3 # via apache-superset pip-tools==7.4.1 # via pip-compile-multi -playwright==1.42.0 - # via apache-superset pluggy==1.5.0 # via pytest pre-commit==4.0.1 @@ -142,8 +140,6 @@ pydata-google-auth==1.9.0 # via pandas-gbq pydruid==0.6.9 # via apache-superset -pyee==11.0.1 - # via playwright pyfakefs==5.3.5 # via apache-superset pyhive[presto]==0.7.0 diff --git a/superset-frontend/webpack.config.js b/superset-frontend/webpack.config.js index 10bf4d49eec1d..59aa6fb4e9db5 100644 --- a/superset-frontend/webpack.config.js +++ b/superset-frontend/webpack.config.js @@ -526,7 +526,7 @@ const config = { 'react/lib/ReactContext': true, }, plugins, - devtool: 'source-map', + devtool: isDevMode ? 'eval-cheap-module-source-map' : false, }; // find all the symlinked plugins and use their source code for imports @@ -544,7 +544,6 @@ console.log(''); // pure cosmetic new line let proxyConfig = getProxyConfig(); if (isDevMode) { - config.devtool = 'eval-cheap-module-source-map'; config.devServer = { onBeforeSetupMiddleware(devServer) { // load proxy config when manifest updates diff --git a/superset/__init__.py b/superset/__init__.py index c97580b122cea..cbab58e0d2c90 100644 --- a/superset/__init__.py +++ b/superset/__init__.py @@ -34,8 +34,7 @@ # All of the fields located here should be considered legacy. The correct way # to declare "global" dependencies is to define it in extensions.py, # then initialize it in app.create_app(). These fields will be removed -# in subsequent PRs as things are migrated towards the factory -# pattern +# in subsequent PRs as things are migrated towards the factory pattern app: Flask = current_app cache = cache_manager.cache conf = LocalProxy(lambda: current_app.config)