diff --git a/.github/workflows/build-internal.yaml b/.github/workflows/build-internal.yaml new file mode 100644 index 0000000000..060245451f --- /dev/null +++ b/.github/workflows/build-internal.yaml @@ -0,0 +1,196 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Build (internal) + +run-name: Building ${{ inputs.version }} ${{ github.ref_name }} + +permissions: + + # Allow the action to upload images to ghcr + packages: write + +on: + + # FYI + # The event payload in the called workflow is the same event payload from the calling workflow + # The inputs are the inputs defined in the called workflow (by the "with") + workflow_call: + inputs: + docker_registries: + description: 'Comma separated list of docker registries to push images to (default: ghcr.io/, use registry.hub.docker.com/ for docker hub)' + default: 'ghcr.io/' + type: string + docker_repo: + description: 'Docker repo to push images to (default: lowercase github repository owner name)' + default: '' + type: string + version: + description: 'The version to build, without prefix v (e.g. 1.1.0), if not provided version will be -, where is taken from automation/version/unstable_version_prefix' + default: '' + type: string + skip_images: + description: 'Comma separated list of images to skip building, example with all possible images: mlrun,api,base,models,models-gpu,jupyter,test' + default: '' + type: string + build_from_cache: + description: 'Whether to build images from cache or not. Default: true, set to false only if required because that will cause a significant increase in build time' + default: 'true' + type: string + +jobs: + matrix_prep: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v3 + - id: set-matrix + uses: ./.github/actions/image-matrix-prep + with: + skip_images: ${{ inputs.skip_images }} + + build-images: + name: Build and push image - ${{ matrix.image-name }} (Python ${{ matrix.python-version }}) + runs-on: ubuntu-latest + needs: matrix_prep + strategy: + fail-fast: false + matrix: ${{fromJson(needs.matrix_prep.outputs.matrix)}} + steps: + - uses: actions/checkout@v3 + + # since github-actions gives us 14G only, and fills it up with some garbage + - name: Freeing up disk space + run: | + "${GITHUB_WORKSPACE}/automation/scripts/github_workflow_free_space.sh" + + - name: Install curl and jq + run: sudo apt-get install curl jq + + - name: Extract git hash, ref and latest version + id: git_info + run: | + echo "mlrun_commit_hash=$(git rev-parse --short=8 $GITHUB_SHA)" >> $GITHUB_OUTPUT + echo "unstable_version_prefix=$(cat automation/version/unstable_version_prefix)" >> $GITHUB_OUTPUT + + - name: Resolve docker cache tag + id: docker_cache + run: | + export version_suffix=$(echo "$GITHUB_REF_NAME" | grep -E "^[0-9]+\.[0-9]+\.x$" | tr -d '.'); + export unstable_tag=$(if [ -z "$version_suffix" ]; then echo "unstable-cache"; else echo "unstable-cache-$version_suffix";fi); + export build_from_cache=$(if [ -z "$INPUT_BUILD_FROM_CACHE" ]; then echo "true" ; else echo "$INPUT_BUILD_FROM_CACHE";fi); + export no_cache=$(if [ "$build_from_cache" = "false" ]; then echo "true" ; else echo "";fi); + echo "tag=$(echo $unstable_tag)" >> $GITHUB_OUTPUT + echo "no_cache=$(echo $no_cache)" >> $GITHUB_OUTPUT + env: + INPUT_BUILD_FROM_CACHE: ${{ inputs.build_from_cache }} + + - name: Set computed versions params + id: computed_params + run: | + echo "mlrun_version=$( \ + input_mlrun_version=$INPUT_VERSION && \ + default_mlrun_version=$(echo ${{ steps.git_info.outputs.unstable_version_prefix }}+${{ steps.git_info.outputs.mlrun_commit_hash }}) && \ + echo ${input_mlrun_version:-`echo $default_mlrun_version`})" >> $GITHUB_OUTPUT + echo "mlrun_docker_repo=$( \ + input_docker_repo=$INPUT_DOCKER_VERSION && \ + default_docker_repo=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]') && \ + echo ${input_docker_repo:-`echo $default_docker_repo`})" >> $GITHUB_OUTPUT + echo "mlrun_docker_registries=$( \ + input_docker_registries=$INPUT_DOCKER_REGISTRIES && \ + echo ${input_docker_registries:-ghcr.io/})" >> $GITHUB_OUTPUT + echo "mlrun_cache_date=$(date +%s)" >> $GITHUB_OUTPUT + env: + INPUT_VERSION: ${{ inputs.version }} + INPUT_DOCKER_VERSION: ${{ inputs.docker_repo }} + INPUT_DOCKER_REGISTRIES: ${{ inputs.docker_registries }} + + - name: Docker login (ghcr) + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Docker login (quay.io) + continue-on-error: true + uses: docker/login-action@v2 + with: + registry: quay.io + username: ${{ secrets.QUAY_IO_DOCKER_REGISTRY_USERNAME }} + password: ${{ secrets.QUAY_IO_DOCKER_REGISTRY_PASSWORD }} + + - name: Docker login (docker.com) + continue-on-error: true + uses: docker/login-action@v2 + with: + registry: registry.hub.docker.com + username: ${{ secrets.DOCKER_HUB_DOCKER_REGISTRY_USERNAME }} + password: ${{ secrets.DOCKER_HUB_DOCKER_REGISTRY_PASSWORD }} + + - name: Pull cache, build and push image + + # we don't really want per-commit test image we just want to build and push the cache image so CI will be able + # to use it and run much faster + if: ${{ matrix.image-name != 'test' }} + run: | + for registry in $(echo ${{ steps.computed_params.outputs.mlrun_docker_registries }} | sed "s/,/ /g"); \ + do \ + MLRUN_CACHE_DATE=${{ steps.computed_params.outputs.mlrun_cache_date }} \ + MLRUN_DOCKER_REGISTRY=$registry \ + MLRUN_DOCKER_CACHE_FROM_REGISTRY=ghcr.io/ \ + MLRUN_DOCKER_REPO=${{ steps.computed_params.outputs.mlrun_docker_repo }} \ + MLRUN_VERSION=${{ steps.computed_params.outputs.mlrun_version }} \ + MLRUN_DOCKER_CACHE_FROM_TAG=${{ steps.docker_cache.outputs.tag }} \ + MLRUN_NO_CACHE=${{ steps.docker_cache.outputs.no_cache }} \ + MLRUN_PUSH_DOCKER_CACHE_IMAGE="true" \ + MLRUN_PYTHON_VERSION=${{ matrix.python-version }} \ + INCLUDE_PYTHON_VERSION_SUFFIX=${{ matrix.include-suffix }} \ + make push-${{ matrix.image-name }}; \ + done; + + - name: Build and push unstable tag + + # we don't need to have unstable tag for the test image + # And we don't need to run this when triggered manually (workflow dispatch) + if: matrix.image-name != 'test' && github.event_name != 'workflow_dispatch' && github.ref_name == 'development' + run: | + for registry in "ghcr.io/" "quay.io/" "registry.hub.docker.com/"; \ + do \ + MLRUN_CACHE_DATE=${{ steps.computed_params.outputs.mlrun_cache_date }} \ + MLRUN_DOCKER_REGISTRY=$registry \ + MLRUN_DOCKER_CACHE_FROM_REGISTRY=ghcr.io/ \ + MLRUN_DOCKER_REPO=${{ steps.computed_params.outputs.mlrun_docker_repo }} \ + MLRUN_VERSION=unstable \ + MLRUN_DOCKER_CACHE_FROM_TAG=${{ steps.docker_cache.outputs.tag }} \ + MLRUN_PYTHON_VERSION=${{ matrix.python-version }} \ + INCLUDE_PYTHON_VERSION_SUFFIX=${{ matrix.include-suffix }} \ + make push-${{ matrix.image-name }}; \ + done; + - name: Pull cache, build and push test image + # When version is given we're probably in a release process, we don't need the test image in that case + if: matrix.image-name == 'test' && inputs.version == '' + run: | + MLRUN_CACHE_DATE=${{ steps.computed_params.outputs.mlrun_cache_date }} \ + MLRUN_DOCKER_REGISTRY=ghcr.io/ \ + MLRUN_DOCKER_CACHE_FROM_REGISTRY=ghcr.io/ \ + MLRUN_DOCKER_REPO=${{ steps.computed_params.outputs.mlrun_docker_repo }} \ + MLRUN_VERSION=${{ steps.docker_cache.outputs.tag }} \ + MLRUN_DOCKER_CACHE_FROM_TAG=${{ steps.docker_cache.outputs.tag }} \ + MLRUN_PUSH_DOCKER_CACHE_IMAGE=true \ + MLRUN_PYTHON_VERSION=${{ matrix.python-version }} \ + INCLUDE_PYTHON_VERSION_SUFFIX=${{ matrix.include-suffix }} \ + make push-${{ matrix.image-name }} diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index db1386eb6a..1a98376d7f 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. # + # This name is referenced in the release.yaml workflow, if you're changing here - change there name: Build + run-name: Building ${{ inputs.version }} ${{ github.ref_name }} +permissions: + + # Allow the action to upload images to ghcr + packages: write + on: push: branches: @@ -44,137 +51,20 @@ on: description: 'Whether to build images from cache or not. Default: true, set to false only if required because that will cause a significant increase in build time' required: true default: 'true' + type: choice + options: + - 'true' + - 'false' jobs: - matrix_prep: - runs-on: ubuntu-latest - outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} - steps: - - uses: actions/checkout@v3 - - id: set-matrix - uses: ./.github/actions/image-matrix-prep - with: - skip_images: ${{ github.event.inputs.skip_images }} - - build-images: - name: Build and push image - ${{ matrix.image-name }} (Python ${{ matrix.python-version }}) - runs-on: ubuntu-latest - needs: matrix_prep - - # let's not run this on every fork, change to your fork when developing + build-mlrun: if: github.repository == 'mlrun/mlrun' || github.event_name == 'workflow_dispatch' - - strategy: - fail-fast: false - matrix: ${{fromJson(needs.matrix_prep.outputs.matrix)}} - steps: - - uses: actions/checkout@v3 - - # since github-actions gives us 14G only, and fills it up with some garbage - - name: Freeing up disk space - run: | - "${GITHUB_WORKSPACE}/automation/scripts/github_workflow_free_space.sh" - - - name: Install curl and jq - run: sudo apt-get install curl jq - - name: Extract git hash, ref and latest version - id: git_info - run: | - echo "mlrun_commit_hash=$(git rev-parse --short=8 $GITHUB_SHA)" >> $GITHUB_OUTPUT - echo "unstable_version_prefix=$(cat automation/version/unstable_version_prefix)" >> $GITHUB_OUTPUT - - name: Resolve docker cache tag - id: docker_cache - run: | - export version_suffix=$(echo "$GITHUB_REF_NAME" | grep -E "^[0-9]+\.[0-9]+\.x$" | tr -d '.'); - export unstable_tag=$(if [ -z "$version_suffix" ]; then echo "unstable-cache"; else echo "unstable-cache-$version_suffix";fi); - export build_from_cache=$(if [ -z "$INPUT_BUILD_FROM_CACHE" ]; then echo "true" ; else echo "$INPUT_BUILD_FROM_CACHE";fi); - export no_cache=$(if [ "$build_from_cache" = "false" ]; then echo "true" ; else echo "";fi); - echo "tag=$(echo $unstable_tag)" >> $GITHUB_OUTPUT - echo "no_cache=$(echo $no_cache)" >> $GITHUB_OUTPUT - env: - INPUT_BUILD_FROM_CACHE: ${{ github.event.inputs.build_from_cache }} - - name: Set computed versions params - id: computed_params - run: | - echo "mlrun_version=$( \ - input_mlrun_version=$INPUT_VERSION && \ - default_mlrun_version=$(echo ${{ steps.git_info.outputs.unstable_version_prefix }}+${{ steps.git_info.outputs.mlrun_commit_hash }}) && \ - echo ${input_mlrun_version:-`echo $default_mlrun_version`})" >> $GITHUB_OUTPUT - echo "mlrun_docker_repo=$( \ - input_docker_repo=$INPUT_DOCKER_VERSION && \ - default_docker_repo=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]') && \ - echo ${input_docker_repo:-`echo $default_docker_repo`})" >> $GITHUB_OUTPUT - echo "mlrun_docker_registries=$( \ - input_docker_registries=$INPUT_DOCKER_REGISTRIES && \ - echo ${input_docker_registries:-ghcr.io/})" >> $GITHUB_OUTPUT - echo "mlrun_cache_date=$(date +%s)" >> $GITHUB_OUTPUT - env: - INPUT_VERSION: ${{ github.event.inputs.version }} - INPUT_DOCKER_VERSION: ${{ github.event.inputs.docker_repo }} - INPUT_DOCKER_REGISTRIES: ${{ github.event.inputs.docker_registries }} - - name: Docker login - # all suffixed with "| true" to allow failures if secrets are not defined (fork) - run: | - echo ${{ secrets.GHCR_DOCKER_REGISTRY_PASSWORD }} | \ - docker login ghcr.io -u ${{ secrets.GHCR_DOCKER_REGISTRY_USERNAME }} --password-stdin | true - echo ${{ secrets.DOCKER_HUB_DOCKER_REGISTRY_PASSWORD }} | \ - docker login registry.hub.docker.com -u ${{ secrets.DOCKER_HUB_DOCKER_REGISTRY_USERNAME }} \ - --password-stdin | true - echo ${{ secrets.QUAY_IO_DOCKER_REGISTRY_PASSWORD }} | \ - docker login quay.io -u ${{ secrets.QUAY_IO_DOCKER_REGISTRY_USERNAME }} \ - --password-stdin | true - - - name: Pull cache, build and push image - - # we don't really want per-commit test image we just want to build and push the cache image so CI will be able - # to use it and run much faster - if: ${{ matrix.image-name != 'test' }} - run: | - for registry in $(echo ${{ steps.computed_params.outputs.mlrun_docker_registries }} | sed "s/,/ /g"); \ - do \ - MLRUN_CACHE_DATE=${{ steps.computed_params.outputs.mlrun_cache_date }} \ - MLRUN_DOCKER_REGISTRY=$registry \ - MLRUN_DOCKER_CACHE_FROM_REGISTRY=ghcr.io/ \ - MLRUN_DOCKER_REPO=${{ steps.computed_params.outputs.mlrun_docker_repo }} \ - MLRUN_VERSION=${{ steps.computed_params.outputs.mlrun_version }} \ - MLRUN_DOCKER_CACHE_FROM_TAG=${{ steps.docker_cache.outputs.tag }} \ - MLRUN_NO_CACHE=${{ steps.docker_cache.outputs.no_cache }} \ - MLRUN_PUSH_DOCKER_CACHE_IMAGE="true" \ - MLRUN_PYTHON_VERSION=${{ matrix.python-version }} \ - INCLUDE_PYTHON_VERSION_SUFFIX=${{ matrix.include-suffix }} \ - make push-${{ matrix.image-name }}; \ - done; - - - name: Build and push unstable tag - - # we don't need to have unstable tag for the test image - # And we don't need to run this when triggered manually (workflow dispatch) - if: matrix.image-name != 'test' && github.event_name != 'workflow_dispatch' && github.ref_name == 'development' - run: | - for registry in "ghcr.io/" "quay.io/" "registry.hub.docker.com/"; \ - do \ - MLRUN_CACHE_DATE=${{ steps.computed_params.outputs.mlrun_cache_date }} \ - MLRUN_DOCKER_REGISTRY=$registry \ - MLRUN_DOCKER_CACHE_FROM_REGISTRY=ghcr.io/ \ - MLRUN_DOCKER_REPO=${{ steps.computed_params.outputs.mlrun_docker_repo }} \ - MLRUN_VERSION=unstable \ - MLRUN_DOCKER_CACHE_FROM_TAG=${{ steps.docker_cache.outputs.tag }} \ - MLRUN_PYTHON_VERSION=${{ matrix.python-version }} \ - INCLUDE_PYTHON_VERSION_SUFFIX=${{ matrix.include-suffix }} \ - make push-${{ matrix.image-name }}; \ - done; - - name: Pull cache, build and push test image - # When version is given we're probably in a release process, we don't need the test image in that case - if: matrix.image-name == 'test' && github.event.inputs.version == '' - run: | - MLRUN_CACHE_DATE=${{ steps.computed_params.outputs.mlrun_cache_date }} \ - MLRUN_DOCKER_REGISTRY=ghcr.io/ \ - MLRUN_DOCKER_CACHE_FROM_REGISTRY=ghcr.io/ \ - MLRUN_DOCKER_REPO=${{ steps.computed_params.outputs.mlrun_docker_repo }} \ - MLRUN_VERSION=${{ steps.docker_cache.outputs.tag }} \ - MLRUN_DOCKER_CACHE_FROM_TAG=${{ steps.docker_cache.outputs.tag }} \ - MLRUN_PUSH_DOCKER_CACHE_IMAGE=true \ - MLRUN_PYTHON_VERSION=${{ matrix.python-version }} \ - INCLUDE_PYTHON_VERSION_SUFFIX=${{ matrix.include-suffix }} \ - make push-${{ matrix.image-name }} + name: Build MLRun + uses: ./.github/workflows/build-internal.yaml + with: + docker_registries: ${{ github.event.inputs.docker_registries }} + docker_repo: ${{ github.event.inputs.docker_repo }} + version: ${{ needs.prepare-inputs.outputs.version }} + skip_images: ${{ github.event.inputs.skip_images }} + build_from_cache: ${{ github.event.inputs.build_from_cache }} + secrets: inherit diff --git a/.github/workflows/periodic-rebuild.yaml b/.github/workflows/periodic-rebuild.yaml index 300bf1d2f7..d214558eea 100644 --- a/.github/workflows/periodic-rebuild.yaml +++ b/.github/workflows/periodic-rebuild.yaml @@ -21,15 +21,16 @@ on: jobs: re-build-images: - # let's not run this on every fork, change to your fork when developing - if: github.repository == 'mlrun/mlrun' || github.event_name == 'workflow_dispatch' + if: github.repository == 'mlrun/mlrun' strategy: fail-fast: false matrix: repo: ["mlrun","ui"] - branch: ["development","1.3.x"] + branch: ["development","1.4.x", "1.3.x"] runs-on: ubuntu-latest steps: + + # TODO: move to reuseable workflow once all branches have backported with the new workflow - name: Re-Build MLRun Image if: matrix.repo == 'mlrun' uses: convictional/trigger-workflow-and-wait@v1.6.5 diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 45d2635f2a..38599cc666 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -14,76 +14,127 @@ # name: Release + run-name: Releasing ${{ inputs.version }} +permissions: + + # Create release and upload artifact to releases + contents: write + + # Allow the action to upload images to ghcr + packages: write + on: workflow_dispatch: inputs: version: - description: 'The version to release, without prefix v (e.g. 1.1.0-rc10)' - required: true - previous_version: - description: 'The previous version, without prefix v (e.g. 1.1.0-rc9)' - required: true - pre_release: - description: 'Whether to mark release as pre-release or not (default: false)' - required: false - default: 'true' - generate_release_notes: - description: 'Whether to generate release notes or not (default: true)' - required: false - default: 'true' + description: 'The version to release, without prefix v (e.g. 1.1.0-rc10). if not provided, will be calculated from the current version and bump_version_mode' + default: '' + type: string + bump_version_mode: + description: 'The version bump mode. Whether to bump rc version or set stable version' + default: 'bump-rc' + type: choice + options: [ 'bump-rc', 'stable' ] skip_images: description: 'Comma separated list of images to skip building, example with all possible images: mlrun,ui,api,base,models,models-gpu,jupyter,test' required: false default: '' skip_publish_pypi: - description: 'Whether to skip publishing the python package to Pypi, (true/false)' + description: 'Whether to skip publishing the python package to Pypi' required: false default: 'false' + type: choice + options: ['true', 'false'] skip_create_tag_release: - description: 'Whether to skip creating tag & release in Github, (true/false)' + description: 'Whether to skip creating tag & release in Github' required: false default: 'false' + type: choice + options: + - 'true' + - 'false' + ui_ref: + description: 'The UI reference (branch / tag name) to use for the UI image, (development, 1.3.3, etc)' + required: false + default: '' jobs: - trigger-and-wait-for-mlrun-image-building: - name: Trigger build workflow in mlrun/mlrun and wait to finish + prepare-inputs: + name: Prepare inputs runs-on: ubuntu-latest - + outputs: + version: ${{ steps.resolve.outputs.version }} + previous_version: ${{ steps.resolve.outputs.previous_version }} + ui_ref: ${{ steps.resolve.outputs.ui_ref }} steps: - - uses: convictional/trigger-workflow-and-wait@v1.6.5 + - uses: actions/checkout@v3 with: - owner: mlrun - repo: mlrun - github_token: ${{ secrets.RELEASE_GITHUB_ACCESS_TOKEN }} - workflow_file_name: build.yaml - ref: ${{ github.ref_name }} - wait_interval: 60 - client_payload: '{"docker_registries": "ghcr.io/,quay.io/,registry.hub.docker.com/", "version": "${{ github.event.inputs.version }}", "skip_images": "${{ github.event.inputs.skip_images }}"}' + # Fetch all history for all tags and branches + fetch-depth: 0 + - name: Resolve inputs + id: resolve + run: | + + # map the input to the actual mode + declare -A bump_version_mode=(["bump-rc"]="rc" ["stable"]="rc-grad") + NEXT_VERSION_MODE=${bump_version_mode[$BUMP_VERSION_MODE_INPUT]} + version=$(python ./automation/version/version_file.py next-version --mode $NEXT_VERSION_MODE) + echo "Calculated version: $version" + if [[ -n "$VERSION_INPUT" ]]; then \ + version=$VERSION_INPUT; \ + echo "Using version from input: $version"; \ + fi + + echo "is_stable_version=$(python ./automation/version/version_file.py is-stable $version) " >> $GITHUB_OUTPUT + echo "version=$version" >> $GITHUB_OUTPUT + echo "previous_version=$(python ./automation/version/version_file.py current-version)" >> $GITHUB_OUTPUT + echo "ui_ref=${UI_REF_INPUT:-`echo ${{ github.ref_name }}`}" >> $GITHUB_OUTPUT + cat $GITHUB_OUTPUT + env: + UI_REF_INPUT: ${{ github.event.inputs.ui_ref }} + BUMP_VERSION_MODE_INPUT: ${{ github.event.inputs.bump_version_mode }} + VERSION_INPUT: ${{ github.event.inputs.version }} + + trigger-and-wait-for-mlrun-image-building: + name: Trigger build workflow in mlrun/mlrun and wait to finish + needs: prepare-inputs + uses: ./.github/workflows/build-internal.yaml + with: + docker_registries: "ghcr.io/,quay.io/,registry.hub.docker.com/" + version: ${{ needs.prepare-inputs.outputs.version }} + skip_images: ${{ github.event.inputs.skip_images }} + secrets: inherit + + # TODO: Move to reuseable-workflow too. + # Requires cross-repo validation for passing github token as secret trigger-and-wait-for-ui-image-building: name: Trigger build workflow in mlrun/ui and wait to finish runs-on: ubuntu-latest - if: ${{ !contains(github.event.inputs.skip_images, 'ui') }} - + needs: prepare-inputs steps: - uses: convictional/trigger-workflow-and-wait@v1.6.5 + + # since some steps relay on the ui image, we need to wait for it to finish building + # the condition is here and not on job because some other jobs "needs" this job to be done (and not skipped) + if: ${{ !contains(github.event.inputs.skip_images, 'ui') }} with: - owner: mlrun + owner: ${{ github.repository_owner }} repo: ui github_token: ${{ secrets.RELEASE_GITHUB_ACCESS_TOKEN }} workflow_file_name: build.yaml - ref: ${{ github.ref_name }} + ref: ${{ needs.prepare-inputs.outputs.ui_ref }} wait_interval: 60 - client_payload: '{"docker_registries": "ghcr.io/,quay.io/,registry.hub.docker.com/", "version": "${{ github.event.inputs.version }}"}' + client_payload: '{"docker_registries": "ghcr.io/,quay.io/,registry.hub.docker.com/", "version": "${{ needs.prepare-inputs.outputs.version }}"}' publish-to-pypi: name: Publish package to pypi runs-on: ubuntu-latest - if: github.event.inputs.skip_publish_pypi != 'true' + # publishing to pypi is (kind of) irreversible, therefore do it only if both previous steps finished successfully - needs: [trigger-and-wait-for-ui-image-building, trigger-and-wait-for-mlrun-image-building] + needs: [ prepare-inputs, trigger-and-wait-for-ui-image-building, trigger-and-wait-for-mlrun-image-building ] steps: - uses: actions/checkout@v3 - name: Set up python @@ -91,41 +142,46 @@ jobs: with: python-version: 3.9 cache: pip + - name: Install dependencies + run: | + pip install -r automation/requirements.txt - name: Build & push to pypi + if: github.event.inputs.skip_publish_pypi != 'true' run: | pip install twine wheel export TWINE_USERNAME=${{ secrets.PYPI_USERNAME }} export TWINE_PASSWORD=${{ secrets.PYPI_PASSWORD }} MLRUN_VERSION="$INPUT_VERSION" make publish-package env: - INPUT_VERSION: ${{ github.event.inputs.version }} + INPUT_VERSION: ${{ needs.prepare-inputs.outputs.version }} create-releases: - name: Create release & tag v${{ github.event.inputs.version }} + name: Create release & tag v${{ needs.prepare-inputs.outputs.version }} runs-on: ubuntu-latest - if: github.event.inputs.skip_create_tag_release != 'true' - needs: publish-to-pypi + needs: [ prepare-inputs, publish-to-pypi ] steps: - uses: ncipollo/release-action@v1 + if: github.event.inputs.skip_create_tag_release != 'true' with: - tag: v${{ github.event.inputs.version }} + tag: v${{ needs.prepare-inputs.outputs.version }} commit: ${{ github.ref_name }} - token: ${{ secrets.RELEASE_GITHUB_ACCESS_TOKEN }} - prerelease: ${{ github.event.inputs.pre_release }} + token: ${{ secrets.GITHUB_TOKEN }} + prerelease: ${{ needs.prepare-inputs.outputs.is_stable_version == 'false' }} - uses: ncipollo/release-action@v1 + if: github.event.inputs.skip_create_tag_release != 'true' with: repo: ui - tag: v${{ github.event.inputs.version }} + tag: v${{ needs.prepare-inputs.outputs.version }} commit: ${{ github.ref_name }} token: ${{ secrets.RELEASE_GITHUB_ACCESS_TOKEN }} # experienced 500 errors when trying to create release notes for ui repo with `prerelease flag` - # prerelease: ${{ github.event.inputs.pre_release }} + # prerelease: ${{ needs.prepare-inputs.outputs.prerelease }} update-release-notes: name: Update release notes runs-on: ubuntu-latest - if: github.event.inputs.generate_release_notes == 'true' - needs: create-releases + if: github.event.inputs.skip_create_tag_release != 'true' + needs: [ prepare-inputs, create-releases ] steps: - uses: actions/checkout@v3 with: @@ -141,29 +197,32 @@ jobs: - name: Generate release notes id: release-notes run: | - make release-notes MLRUN_OLD_VERSION="v$INPUT_PREVIOUS_VERSION" MLRUN_VERSION="v$INPUT_VERSION" MLRUN_RELEASE_BRANCH=${{ github.ref_name }} MLRUN_RAISE_ON_ERROR=false MLRUN_RELEASE_NOTES_OUTPUT_FILE=release_notes.md MLRUN_SKIP_CLONE=true + make release-notes env: - INPUT_PREVIOUS_VERSION: ${{ github.event.inputs.previous_version }} - INPUT_VERSION: ${{ github.event.inputs.version }} + MLRUN_SKIP_CLONE: true + MLRUN_RELEASE_BRANCH: ${{ github.ref_name }} + MLRUN_RELEASE_NOTES_OUTPUT_FILE: release_notes.md + MLRUN_RAISE_ON_ERROR: false + MLRUN_OLD_VERSION: "v${{ needs.prepare-inputs.outputs.previous_version }}" + MLRUN_VERSION: "v${{ needs.prepare-inputs.outputs.version }}" + - name: resolve release notes id: resolve-release-notes run: | echo "body<> $GITHUB_OUTPUT cat release_notes.md >> $GITHUB_OUTPUT echo "EOF" >> $GITHUB_OUTPUT + - uses: ncipollo/release-action@v1 with: - tag: v${{ github.event.inputs.version }} - commit: ${{ github.ref_name }} - token: ${{ secrets.RELEASE_GITHUB_ACCESS_TOKEN }} - allowUpdates: true - prerelease: ${{ github.event.inputs.pre_release }} + tag: v${{ needs.prepare-inputs.outputs.version }} body: ${{ steps.resolve-release-notes.outputs.body }} - - + token: ${{ secrets.GITHUB_TOKEN }} + allowUpdates: true + update-tutorials: name: Bundle tutorials - needs: create-releases + needs: [ prepare-inputs, create-releases ] runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -174,6 +233,6 @@ jobs: uses: ncipollo/release-action@v1 with: allowUpdates: true - tag: v${{ github.event.inputs.version }} - token: ${{ secrets.RELEASE_GITHUB_ACCESS_TOKEN }} + tag: v${{ needs.prepare-inputs.outputs.version }} + token: ${{ secrets.GITHUB_TOKEN }} artifacts: mlrun-tutorials.tar diff --git a/.github/workflows/security_scan.yaml b/.github/workflows/security_scan.yaml index 67ec5e488b..cd7fd5ce78 100644 --- a/.github/workflows/security_scan.yaml +++ b/.github/workflows/security_scan.yaml @@ -43,10 +43,21 @@ on: description: 'The minimum severity of vulnerabilities to report ("negligible", "low", "medium", "high" and "critical".)' required: false default: 'medium' + type: choice + options: + - 'negligible' + - 'low' + - 'medium' + - 'high' + - 'critical' only_fixed: description: 'Whether to scan only fixed vulnerabilities ("true" or "false")' required: false default: 'true' + type: choice + options: + - 'true' + - 'false' jobs: matrix_prep: diff --git a/.github/workflows/system-tests-enterprise.yml b/.github/workflows/system-tests-enterprise.yml index e496243be9..aa80a6e43f 100644 --- a/.github/workflows/system-tests-enterprise.yml +++ b/.github/workflows/system-tests-enterprise.yml @@ -39,6 +39,10 @@ on: description: 'Clean resources created by test (like project) in each test teardown (default: true - perform clean)' required: true default: 'true' + type: choice + options: + - 'true' + - 'false' override_iguazio_version: description: 'Override the configured target system iguazio version (leave empty to resolve automatically)' required: false @@ -162,10 +166,10 @@ jobs: with: python-version: 3.9 cache: pip - - name: Install automation scripts dependencies and add mlrun to dev packages + - name: Install automation scripts dependencies run: | - pip install -r automation/requirements.txt && pip install -e . - sudo apt-get install curl jq + pip install -r automation/requirements.txt + sudo apt-get install curl jq gnupg - name: Extract git hashes from upstream and latest version id: git_upstream_info run: | @@ -237,18 +241,12 @@ jobs: --data-cluster-ssh-username "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}" \ --data-cluster-ssh-password "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \ --app-cluster-ssh-password "${{ secrets.LATEST_SYSTEM_TEST_APP_CLUSTER_SSH_PASSWORD }}" \ - --github-access-token "${{ secrets.SYSTEM_TEST_GITHUB_ACCESS_TOKEN }}" \ --provctl-download-url "${{ secrets.LATEST_SYSTEM_TEST_PROVCTL_DOWNLOAD_PATH }}" \ --provctl-download-s3-access-key "${{ secrets.LATEST_SYSTEM_TEST_PROVCTL_DOWNLOAD_URL_S3_ACCESS_KEY }}" \ --provctl-download-s3-key-id "${{ secrets.LATEST_SYSTEM_TEST_PROVCTL_DOWNLOAD_URL_S3_KEY_ID }}" \ - --mlrun-dbpath "${{ secrets.LATEST_SYSTEM_TEST_MLRUN_DB_PATH }}" \ - --webapi-direct-url "${{ secrets.LATEST_SYSTEM_TEST_WEBAPI_DIRECT_URL }}" \ - --framesd-url "${{ secrets.LATEST_SYSTEM_TEST_FRAMESD_URL }}" \ --username "${{ secrets.LATEST_SYSTEM_TEST_USERNAME }}" \ --access-key "${{ secrets.LATEST_SYSTEM_TEST_ACCESS_KEY }}" \ --iguazio-version "${{ steps.computed_params.outputs.iguazio_version }}" \ - --spark-service "${{ secrets.LATEST_SYSTEM_TEST_SPARK_SERVICE }}" \ - --slack-webhook-url "${{ secrets.LATEST_SYSTEM_TEST_SLACK_WEBHOOK_URL }}" \ --mysql-user "${{ secrets.LATEST_SYSTEM_TEST_MYSQL_USER }}" \ --mysql-password "${{ secrets.LATEST_SYSTEM_TEST_MYSQL_PASSWORD }}" \ --purge-db \ @@ -258,6 +256,37 @@ jobs: --override-mlrun-images \ "${{ steps.computed_params.outputs.mlrun_docker_registry }}${{ steps.computed_params.outputs.mlrun_docker_repo }}/mlrun-api:${{ steps.computed_params.outputs.mlrun_docker_tag }},ghcr.io/mlrun/mlrun-ui:${{ steps.computed_params.outputs.mlrun_ui_version }},ghcr.io/mlrun/mlrun:${{ steps.computed_params.outputs.mlrun_docker_tag }},ghcr.io/mlrun/ml-models:${{ steps.computed_params.outputs.mlrun_docker_tag }},ghcr.io/mlrun/ml-base:${{ steps.computed_params.outputs.mlrun_docker_tag }},ghcr.io/mlrun/log-collector:${{ steps.computed_params.outputs.mlrun_docker_tag }}" + - name: Prepare System Test env.yml and MLRun installation from current branch + timeout-minutes: 5 + run: | + python automation/system_test/prepare.py env \ + --data-cluster-ip "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}" \ + --data-cluster-ssh-username "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}" \ + --data-cluster-ssh-password "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \ + --mlrun-dbpath "${{ secrets.LATEST_SYSTEM_TEST_MLRUN_DB_PATH }}" \ + --username "${{ secrets.LATEST_SYSTEM_TEST_USERNAME }}" \ + --access-key "${{ secrets.LATEST_SYSTEM_TEST_ACCESS_KEY }}" \ + --slack-webhook-url "${{ secrets.LATEST_SYSTEM_TEST_SLACK_WEBHOOK_URL }}" \ + --branch "${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunBranch }}" \ + --github-access-token "${{ secrets.SYSTEM_TEST_GITHUB_ACCESS_TOKEN }}" \ + --save-to-path "${{ github.workspace }}/env.yml" + + - name: Encrypt file + run: | + gpg \ + --batch \ + --passphrase "${{ env.GPG_PASSPHRASE }}" \ + --output "${{ github.workspace }}/env.yml.gpg" \ + --symmetric "${{ github.workspace }}/env.yml" + env: + GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} + + - name: Upload env file + uses: actions/upload-artifact@v3 + with: + name: env + path: "${{ github.workspace }}/env.yml.gpg" + if-no-files-found: error outputs: mlrunVersion: ${{ steps.computed_params.outputs.mlrun_version }} @@ -285,28 +314,29 @@ jobs: # than the mlrun version we deployed on the previous job (can have features that the resolved branch doesn't have) with: ref: ${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunBranch }} - - name: Set up python - uses: actions/setup-python@v4 + + - uses: actions/download-artifact@v3 with: - python-version: 3.9 - cache: pip - - name: Install automation scripts dependencies and add mlrun to dev packages - run: pip install -r automation/requirements.txt && pip install -e . - - name: Install curl and jq - run: sudo apt-get install curl jq - - name: Prepare System Test env.yaml and MLRun installation from current branch - timeout-minutes: 5 + name: env + path: "${{ github.workspace }}/tests/system" + + - name: Install GPG run: | - python automation/system_test/prepare.py env \ - --mlrun-dbpath "${{ secrets.LATEST_SYSTEM_TEST_MLRUN_DB_PATH }}" \ - --webapi-direct-url "${{ secrets.LATEST_SYSTEM_TEST_WEBAPI_DIRECT_URL }}" \ - --framesd-url "${{ secrets.LATEST_SYSTEM_TEST_FRAMESD_URL }}" \ - --username "${{ secrets.LATEST_SYSTEM_TEST_USERNAME }}" \ - --access-key "${{ secrets.LATEST_SYSTEM_TEST_ACCESS_KEY }}" \ - --spark-service "${{ secrets.LATEST_SYSTEM_TEST_SPARK_SERVICE }}" \ - --slack-webhook-url "${{ secrets.LATEST_SYSTEM_TEST_SLACK_WEBHOOK_URL }}" \ - --branch "${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunBranch }}" \ - --github-access-token "${{ secrets.SYSTEM_TEST_GITHUB_ACCESS_TOKEN }}" + sudo apt-get update -qqy && sudo apt-get install -y gnupg + + - name: Decrypt file + run: | + gpg \ + --batch \ + --passphrase "${{ env.GPG_PASSPHRASE }}" \ + --output "${{ github.workspace }}/tests/system/env.yml" \ + --decrypt "${{ github.workspace }}/tests/system/env.yml.gpg" + + # ensure file created + test -f ${{ github.workspace }}/tests/system/env.yml + env: + GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} + - name: Run System Tests run: | MLRUN_SYSTEM_TESTS_CLEAN_RESOURCES="${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunSystemTestsCleanResources }}" \ diff --git a/.github/workflows/system-tests-opensource.yml b/.github/workflows/system-tests-opensource.yml index 5aed437e52..1b61a50e76 100644 --- a/.github/workflows/system-tests-opensource.yml +++ b/.github/workflows/system-tests-opensource.yml @@ -39,10 +39,18 @@ on: description: 'Clean resources created by test (like project) in each test teardown (default: true - perform clean)' required: true default: 'true' + type: choice + options: + - 'true' + - 'false' debug_enabled: description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)' required: false default: 'false' + type: choice + options: + - 'true' + - 'false' env: NAMESPACE: mlrun diff --git a/.importlinter b/.importlinter index 275a80e20e..611dd77cd1 100644 --- a/.importlinter +++ b/.importlinter @@ -3,8 +3,8 @@ root_package=mlrun include_external_packages=True -[importlinter:contract:1] -name=common modules shouldn't import other mlrun utilities +[importlinter:contract:mlrun-common] +name=Common modules shouldn't import other mlrun utilities type=forbidden source_modules= mlrun.common @@ -17,16 +17,88 @@ forbidden_modules= mlrun.db mlrun.feature_store mlrun.frameworks + mlrun.launcher mlrun.mlutils mlrun.model_monitoring + mlrun.package + mlrun.platforms + mlrun.projects + mlrun.runtimes + mlrun.serving + mlrun.execution + mlrun.features + mlrun.k8s_utils + mlrun.kfpops + mlrun.lists + mlrun.model + mlrun.render + mlrun.run + mlrun.secrets + +ignore_imports = + mlrun.config -> mlrun.db + mlrun.utils.clones -> mlrun + mlrun.utils.helpers -> mlrun + +[importlinter:contract:mlrun-api] +name=MLRun modules shouldn't import MLRun API +type=forbidden +source_modules= + mlrun.artifacts + mlrun.common + mlrun.data_types + mlrun.datastore + mlrun.db + mlrun.feature_store + mlrun.frameworks + mlrun.launcher + mlrun.mlutils + mlrun.model_monitoring + mlrun.package mlrun.platforms mlrun.projects mlrun.runtimes mlrun.serving mlrun.utils - mlrun.builder mlrun.config mlrun.errors + mlrun.execution + mlrun.features + mlrun.k8s_utils + mlrun.kfpops mlrun.lists mlrun.model + mlrun.render mlrun.run + mlrun.secrets + +forbidden_modules= + mlrun.api + +ignore_imports = + mlrun.utils.model_monitoring -> mlrun.api.crud.secrets + mlrun.feature_store.feature_set -> mlrun.api.api.utils + mlrun.feature_store.ingestion -> mlrun.api.api.utils + mlrun.db.sqldb -> mlrun.api.db.sqldb.session + mlrun.db.sqldb -> mlrun.api.crud + mlrun.db.sqldb -> mlrun.api.db.sqldb.db + mlrun.db.sqldb -> mlrun.api.db.base + mlrun.utils.notifications.notification_pusher -> mlrun.api.db.session + mlrun.utils.notifications.notification_pusher -> mlrun.api.db.base + mlrun.runtimes.base -> mlrun.api.crud + mlrun.runtimes.base -> mlrun.api.constants + mlrun.runtimes.base -> mlrun.api.db.base + mlrun.runtimes.daskjob -> mlrun.api.utils.singletons.k8s + mlrun.runtimes.daskjob -> mlrun.api.db.base + mlrun.runtimes.mpijob.v1 -> mlrun.api.db.base + mlrun.runtimes.mpijob.v1alpha1 -> mlrun.api.db.base + mlrun.runtimes.sparkjob.abstract -> mlrun.api.db.base + mlrun.launcher.factory -> mlrun.api.launcher + mlrun.runtimes.utils -> mlrun.api.utils.singletons.k8s + mlrun.model_monitoring.helpers -> mlrun.api.api.utils + mlrun.model_monitoring.helpers -> mlrun.api.api.deps + mlrun.model_monitoring.helpers -> mlrun.api.utils.singletons.k8s + mlrun.model_monitoring.helpers -> mlrun.api.utils.singletons.db + mlrun.model_monitoring.helpers -> mlrun.api.crud.secrets + mlrun.model_monitoring.stores.sql_model_endpoint_store -> mlrun.api.db.sqldb.session + diff --git a/.run/MLRun API.run.xml b/.run/MLRun API.run.xml new file mode 100644 index 0000000000..40c1893b35 --- /dev/null +++ b/.run/MLRun API.run.xml @@ -0,0 +1,26 @@ + + + + + \ No newline at end of file diff --git a/Makefile b/Makefile index e8ffe4d552..05052d5e18 100644 --- a/Makefile +++ b/Makefile @@ -20,14 +20,6 @@ MLRUN_VERSION ?= unstable # version for the python package with 0.0.0+ # if the provided version includes a "+" we replace it with "-" for the docker tag MLRUN_DOCKER_TAG ?= $(shell echo "$(MLRUN_VERSION)" | sed -E 's/\+/\-/g') -# if the provided version is a semver and followed by a "-" we replace its first occurrence with "+" to align with PEP 404 -ifneq ($(shell echo "$(MLRUN_VERSION)" | grep -E "^[0-9]+\.[0-9]+\.[0-9]+-" | grep -vE "^[0-9]+\.[0-9]+\.[0-9]+-(a|b|rc)[0-9]+$$"),) - MLRUN_PYTHON_PACKAGE_VERSION ?= $(shell echo "$(MLRUN_VERSION)" | sed "s/\-/\+/") -endif -ifeq ($(shell echo "$(MLRUN_VERSION)" | grep -E "^[0-9]+\.[0-9]+\.[0-9]+.*$$"),) # empty result from egrep - MLRUN_PYTHON_PACKAGE_VERSION ?= 0.0.0+$(MLRUN_VERSION) -endif -MLRUN_PYTHON_PACKAGE_VERSION ?= $(MLRUN_VERSION) MLRUN_DOCKER_REPO ?= mlrun # empty by default (dockerhub), can be set to something like "quay.io/". # This will be used to tag the images built using this makefile @@ -59,6 +51,8 @@ MLRUN_SYSTEM_TESTS_CLEAN_RESOURCES ?= true MLRUN_CUDA_VERSION ?= 11.7.0 MLRUN_TENSORFLOW_VERSION ?= 2.9.0 MLRUN_HOROVOD_VERSION ?= 0.25.0 +# overrides the ml-models base image (models core) since it is broken and ml-models is about to be deprecated anyway +MLRUN_MODELS_BASE_IMAGE_OVERRIDE ?= quay.io/mlrun/ml-models:1.4.0-rc15 # THIS BLOCK IS FOR COMPUTED VARIABLES MLRUN_DOCKER_IMAGE_PREFIX := $(if $(MLRUN_DOCKER_REGISTRY),$(strip $(MLRUN_DOCKER_REGISTRY))$(MLRUN_DOCKER_REPO),$(MLRUN_DOCKER_REPO)) @@ -169,7 +163,7 @@ endif .PHONY: update-version-file update-version-file: ## Update the version file - python ./automation/version/version_file.py --mlrun-version $(MLRUN_PYTHON_PACKAGE_VERSION) + python ./automation/version/version_file.py ensure --mlrun-version $(MLRUN_VERSION) .PHONY: build build: docker-images package-wheel ## Build all artifacts @@ -272,7 +266,7 @@ pull-base: ## Pull base docker image MLRUN_MODELS_IMAGE_NAME := $(MLRUN_DOCKER_IMAGE_PREFIX)/$(MLRUN_ML_DOCKER_IMAGE_NAME_PREFIX)models MLRUN_MODELS_CACHE_IMAGE_NAME := $(MLRUN_CACHE_DOCKER_IMAGE_PREFIX)/$(MLRUN_ML_DOCKER_IMAGE_NAME_PREFIX)models MLRUN_MODELS_IMAGE_NAME_TAGGED := $(MLRUN_MODELS_IMAGE_NAME):$(MLRUN_DOCKER_TAG)$(MLRUN_PYTHON_VERSION_SUFFIX) -MLRUN_CORE_MODELS_IMAGE_NAME_TAGGED := $(MLRUN_MODELS_IMAGE_NAME_TAGGED)$(MLRUN_CORE_DOCKER_TAG_SUFFIX) +MLRUN_CORE_MODELS_IMAGE_NAME_TAGGED := $(if $(MLRUN_MODELS_BASE_IMAGE_OVERRIDE),$(strip $(MLRUN_MODELS_BASE_IMAGE_OVERRIDE))$(MLRUN_PYTHON_VERSION_SUFFIX),$(MLRUN_MODELS_IMAGE_NAME_TAGGED)$(MLRUN_CORE_DOCKER_TAG_SUFFIX)) MLRUN_MODELS_CACHE_IMAGE_NAME_TAGGED := $(MLRUN_MODELS_CACHE_IMAGE_NAME):$(MLRUN_DOCKER_CACHE_FROM_TAG)$(MLRUN_PYTHON_VERSION_SUFFIX) MLRUN_MODELS_IMAGE_DOCKER_CACHE_FROM_FLAG := $(if $(and $(MLRUN_DOCKER_CACHE_FROM_TAG),$(MLRUN_USE_CACHE)),--cache-from $(strip $(MLRUN_MODELS_CACHE_IMAGE_NAME_TAGGED)),) MLRUN_MODELS_CACHE_IMAGE_PUSH_COMMAND := $(if $(and $(MLRUN_DOCKER_CACHE_FROM_TAG),$(MLRUN_PUSH_DOCKER_CACHE_IMAGE)),docker tag $(MLRUN_MODELS_IMAGE_NAME_TAGGED) $(MLRUN_MODELS_CACHE_IMAGE_NAME_TAGGED) && docker push $(MLRUN_MODELS_CACHE_IMAGE_NAME_TAGGED),) @@ -294,7 +288,7 @@ models-core: base-core ## Build models core docker image --tag $(MLRUN_CORE_MODELS_IMAGE_NAME_TAGGED) . .PHONY: models -models: models-core ## Build models docker image +models: #models-core ## Build models docker image (uncomment to build models-core base image if not overriden) docker build \ --file dockerfiles/common/Dockerfile \ --build-arg MLRUN_BASE_IMAGE=$(MLRUN_CORE_MODELS_IMAGE_NAME_TAGGED) \ @@ -533,7 +527,7 @@ test: clean ## Run mlrun tests --ignore=tests/system \ --ignore=tests/rundb/test_httpdb.py \ -rf \ - tests + tests/frameworks/test_ml_frameworks.py .PHONY: test-integration-dockerized @@ -683,12 +677,12 @@ fmt: ## Format the code (using black and isort) python -m isort . .PHONY: lint-imports -lint-imports: ## making sure imports dependencies are aligned +lint-imports: ## Validates import dependencies @echo "Running import linter" lint-imports .PHONY: lint -lint: flake8 fmt-check ## Run lint on the code +lint: flake8 fmt-check lint-imports ## Run lint on the code .PHONY: fmt-check fmt-check: ## Format and check the code (using black) diff --git a/README.md b/README.md index aa423c4b7c..fe8ce342e6 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) ![GitHub commit activity](https://img.shields.io/github/commit-activity/w/mlrun/mlrun) ![GitHub release (latest SemVer)](https://img.shields.io/github/v/release/mlrun/mlrun?sort=semver) -[![Join MLOps Live](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](mlopslive.slack.com) +[![Join MLOps Live](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://mlopslive.slack.com)

MLRun logo

diff --git a/automation/requirements.txt b/automation/requirements.txt index 5097388006..9f85d431ac 100644 --- a/automation/requirements.txt +++ b/automation/requirements.txt @@ -3,3 +3,5 @@ paramiko~=2.12 semver~=2.13 requests~=2.22 boto3~=1.24.59 +pyyaml~=5.1 +packaging~=23.1 diff --git a/automation/system_test/dev_utilities.py b/automation/system_test/dev_utilities.py index a760c35f1b..de886e581e 100644 --- a/automation/system_test/dev_utilities.py +++ b/automation/system_test/dev_utilities.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # + import base64 +import json import subprocess import click @@ -285,7 +287,7 @@ def uninstall(redis, kafka, mysql, redisinsight): print(e) try: print("namespace deleteted") - delns = "kubectl delete namespace devtools" + cmd = "kubectl delete namespace devtools" subprocess.run(cmd.split(), check=True) except Exception as e: # !!! print(e) @@ -307,30 +309,47 @@ def list_services_h(): @click.command() -@click.option("--redis", is_flag=True, help="Install Redis") -@click.option("--kafka", is_flag=True, help="Install Kafka") -@click.option("--mysql", is_flag=True, help="Install MySQL") -@click.option("--redisinsight", is_flag=True, help="Install Redis GUI") -def status(redis, kafka, mysql, redisinsight): +@click.option("--redis", is_flag=True, help="Get Redis info") +@click.option("--kafka", is_flag=True, help="Get Kafka info") +@click.option("--mysql", is_flag=True, help="Get MySQL info") +@click.option("--redisinsight", is_flag=True, help="Get Redis GUI info") +@click.option("--output", default="human", type=click.Choice(["human", "json"])) +def status(redis, kafka, mysql, redisinsight, output): namespace = "devtools" + get_all_output = {} if redis: svc_password = get_svc_password(namespace, "redis", "redis-password") - print_svc_info( - "redis-master-0.redis-headless.devtools.svc.cluster.local", - 6379, - "default", - svc_password, - "-------", - ) + get_all_output["redis"] = status_h("redis") + if output == "human": + print_svc_info( + "redis-master-0.redis-headless.devtools.svc.cluster.local", + 6379, + "default", + svc_password, + "-------", + ) if kafka: - print_svc_info("kafka", 9092, "-------", "-------", "-------") + get_all_output["kafka"] = status_h("kafka") + if output == "human": + print_svc_info("kafka", 9092, "-------", "-------", "-------") if mysql: svc_password = get_svc_password(namespace, "mysql", "mysql-root-password") - print_svc_info("mysql", 3306, "root", svc_password, "-------") + get_all_output["mysql"] = status_h("mysql") + if output == "human": + print_svc_info("mysql", 3306, "root", svc_password, "-------") if redisinsight: - fqdn = get_ingress_controller_version() - full_domain = "https://redisinsight" + fqdn - print_svc_info("", " " + full_domain, "-------", "-------", "-------") + get_all_output["redisinsight"] = status_h("redisinsight") + if output == "human": + print_svc_info( + "", + " " + get_all_output["redisinsight"]["app_url"], + "-------", + "-------", + "-------", + ) + + if output == "json": + print(json.dumps(get_all_output)) def status_h(svc): diff --git a/automation/system_test/prepare.py b/automation/system_test/prepare.py index 15efd58be1..e81e4e0ec8 100644 --- a/automation/system_test/prepare.py +++ b/automation/system_test/prepare.py @@ -14,6 +14,7 @@ # import datetime +import json import logging import os import pathlib @@ -30,11 +31,27 @@ import paramiko import yaml -# TODO: remove and use local logger -import mlrun.utils + +class Logger: + def __init__(self, name, **kwargs): + self._logger = logging.getLogger(name) + level = kwargs.get("level", logging.INFO) + self._logger.setLevel(level) + if not self._logger.handlers: + ch = logging.StreamHandler() + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + ch.setFormatter(formatter) + self._logger.addHandler(ch) + + def log(self, level: str, message: str, **kwargs: typing.Any) -> None: + more = f": {kwargs}" if kwargs else "" + self._logger.log(logging.getLevelName(level.upper()), f"{message}{more}") + project_dir = pathlib.Path(__file__).resolve().parent.parent.parent -logger = mlrun.utils.create_logger(level="debug", name="automation") +logger = Logger(level=logging.DEBUG, name="automation") logging.getLogger("paramiko").setLevel(logging.DEBUG) @@ -69,12 +86,9 @@ def __init__( provctl_download_s3_access_key: str = None, provctl_download_s3_key_id: str = None, mlrun_dbpath: str = None, - webapi_direct_http: str = None, - framesd_url: str = None, username: str = None, access_key: str = None, iguazio_version: str = None, - spark_service: str = None, slack_webhook_url: str = None, mysql_user: str = None, mysql_password: str = None, @@ -97,7 +111,6 @@ def __init__( self._data_cluster_ssh_username = data_cluster_ssh_username self._data_cluster_ssh_password = data_cluster_ssh_password self._app_cluster_ssh_password = app_cluster_ssh_password - self._github_access_token = github_access_token self._provctl_download_url = provctl_download_url self._provctl_download_s3_access_key = provctl_download_s3_access_key self._provctl_download_s3_key_id = provctl_download_s3_key_id @@ -105,14 +118,12 @@ def __init__( self._mysql_user = mysql_user self._mysql_password = mysql_password self._purge_db = purge_db + self._ssh_client = None self._env_config = { "MLRUN_DBPATH": mlrun_dbpath, - "V3IO_API": webapi_direct_http, - "V3IO_FRAMESD": framesd_url, "V3IO_USERNAME": username, "V3IO_ACCESS_KEY": access_key, - "MLRUN_SYSTEM_TESTS_DEFAULT_SPARK_SERVICE": spark_service, "MLRUN_SYSTEM_TESTS_SLACK_WEBHOOK_URL": slack_webhook_url, "MLRUN_SYSTEM_TESTS_BRANCH": branch, # Setting to MLRUN_SYSTEM_TESTS_GIT_TOKEN instead of GIT_TOKEN, to not affect tests which doesn't need it @@ -120,14 +131,16 @@ def __init__( "MLRUN_SYSTEM_TESTS_GIT_TOKEN": github_access_token, } - def prepare_local_env(self): - self._prepare_env_local() + def prepare_local_env(self, save_to_path: str = ""): + self._prepare_env_local(save_to_path) def connect_to_remote(self): - self._logger.info( - "Connecting to data-cluster", data_cluster_ip=self._data_cluster_ip - ) - if not self._debug: + if not self._debug and self._data_cluster_ip: + self._logger.log( + "info", + "Connecting to data-cluster", + data_cluster_ip=self._data_cluster_ip, + ) self._ssh_client = paramiko.SSHClient() self._ssh_client.set_missing_host_key_policy(paramiko.WarningPolicy) self._ssh_client.connect( @@ -140,11 +153,13 @@ def run(self): self.connect_to_remote() try: - logger.debug("installing dev utilities") + logger.log("debug", "installing dev utilities") self._install_dev_utilities() - logger.debug("installing dev utilities - done") + logger.log("debug", "installing dev utilities - done") except Exception as exp: - self._logger.error("error on install dev utilities", exception=str(exp)) + self._logger.log( + "error", "error on install dev utilities", exception=str(exp) + ) # for sanity clean up before starting the run self.clean_up_remote_workdir() @@ -166,8 +181,8 @@ def run(self): self._patch_mlrun() def clean_up_remote_workdir(self): - self._logger.info( - "Cleaning up remote workdir", workdir=str(self.Constants.workdir) + self._logger.log( + "info", "Cleaning up remote workdir", workdir=str(self.Constants.workdir) ) self._run_command( f"rm -rf {self.Constants.workdir}", workdir=str(self.Constants.homedir) @@ -191,7 +206,8 @@ def _run_command( log_command_location = "locally" if local else "on data cluster" if verbose: - self._logger.debug( + self._logger.log( + "debug", f"Running command {log_command_location}", command=command, args=args, @@ -227,14 +243,16 @@ def _run_command( if verbose: err_log_kwargs["command"] = command - self._logger.error( + self._logger.log( + "error", f"Failed running command {log_command_location}", **err_log_kwargs, ) raise else: if verbose: - self._logger.debug( + self._logger.log( + "debug", f"Successfully ran command {log_command_location}", command=command, stdout=stdout, @@ -263,7 +281,9 @@ def _run_command_remotely( if detach: command = f"screen -d -m bash -c '{command}'" if verbose: - self._logger.debug("running command in detached mode", command=command) + self._logger.log( + "debug", "running command in detached mode", command=command + ) stdin_stream, stdout_stream, stderr_stream = self._ssh_client.exec_command( command @@ -296,18 +316,21 @@ def _prepare_env_remote(self): workdir=str(self.Constants.homedir), ) - def _prepare_env_local(self): - filepath = str(self.Constants.system_tests_env_yaml) + def _prepare_env_local(self, save_to_path: str = ""): + filepath = save_to_path or str(self.Constants.system_tests_env_yaml) backup_filepath = str(self.Constants.system_tests_env_yaml) + ".bak" - self._logger.debug("Populating system tests env.yml", filepath=filepath) + self._logger.log("debug", "Populating system tests env.yml", filepath=filepath) # if filepath exists, backup the file first (to avoid overriding it) if os.path.isfile(filepath) and not os.path.isfile(backup_filepath): - self._logger.debug( - "Backing up existing env.yml", destination=backup_filepath + self._logger.log( + "debug", "Backing up existing env.yml", destination=backup_filepath ) shutil.copy(filepath, backup_filepath) + # enrichment can be done only if ssh client is initialized + if self._ssh_client: + self._enrich_env() serialized_env_config = self._serialize_env_config() with open(filepath, "w") as f: f.write(serialized_env_config) @@ -349,6 +372,26 @@ def _override_mlrun_api_env(self): args=["apply", "-f", manifest_file_name], ) + def _enrich_env(self): + devutils_outputs = self._get_devutils_status() + if "redis" in devutils_outputs: + self._logger.log("debug", "Enriching env with redis info") + # uncomment when url is accessible from outside the cluster + # self._env_config["MLRUN_REDIS__URL"] = f"redis://{devutils_outputs['redis']['app_url']}" + # self._env_config["REDIS_USER"] = devutils_outputs["redis"]["username"] + # self._env_config["REDIS_PASSWORD"] = devutils_outputs["redis"]["password"] + + api_url_host = self._get_ingress_host("datanode-dashboard") + framesd_host = self._get_ingress_host("framesd") + v3io_api_host = self._get_ingress_host("webapi") + spark_service_name = self._get_service_name("app=spark,component=spark-master") + self._env_config["MLRUN_IGUAZIO_API_URL"] = f"https://{api_url_host}" + self._env_config["V3IO_FRAMESD"] = f"https://{framesd_host}" + self._env_config[ + "MLRUN_SYSTEM_TESTS_DEFAULT_SPARK_SERVICE" + ] = spark_service_name + self._env_config["V3IO_API"] = f"https://{v3io_api_host}" + def _install_dev_utilities(self): list_uninstall = [ "dev_utilities.py", @@ -385,7 +428,8 @@ def _download_provctl(self): bucket_name = parsed_url.netloc.split(".")[0] # download provctl from s3 with tempfile.NamedTemporaryFile() as local_provctl_path: - self._logger.debug( + self._logger.log( + "debug", "Downloading provctl", bucket_name=bucket_name, object_name=object_name, @@ -398,7 +442,8 @@ def _download_provctl(self): ) s3_client.download_file(bucket_name, object_name, local_provctl_path.name) # upload provctl to data node - self._logger.debug( + self._logger.log( + "debug", "Uploading provctl to datanode", remote_path=str(self.Constants.provctl_path), local_path=local_provctl_path.name, @@ -425,7 +470,8 @@ def _run_and_wait_until_successful( finished = True except Exception: - self._logger.debug( + self._logger.log( + "debug", f"Command {command_name} didn't complete yet, trying again in {interval} seconds", retry_number=retries, ) @@ -433,19 +479,21 @@ def _run_and_wait_until_successful( time.sleep(interval) if retries >= max_retries and not finished: - self._logger.info( - f"Command {command_name} timeout passed and not finished, failing..." + self._logger.log( + "info", + f"Command {command_name} timeout passed and not finished, failing...", ) - raise mlrun.errors.MLRunTimeoutError() + raise RuntimeError("Command timeout passed and not finished") total_seconds_took = (datetime.datetime.now() - start_time).total_seconds() - self._logger.info( - f"Command {command_name} took {total_seconds_took} seconds to finish" + self._logger.log( + "info", + f"Command {command_name} took {total_seconds_took} seconds to finish", ) def _patch_mlrun(self): time_string = time.strftime("%Y%m%d-%H%M%S") - self._logger.debug( - "Creating mlrun patch archive", mlrun_version=self._mlrun_version + self._logger.log( + "debug", "Creating mlrun patch archive", mlrun_version=self._mlrun_version ) mlrun_archive = f"./mlrun-{self._mlrun_version}.tar" @@ -480,7 +528,9 @@ def _patch_mlrun(self): # print provctl create patch log self._run_command(f"cat {provctl_create_patch_log}") - self._logger.info("Patching MLRun version", mlrun_version=self._mlrun_version) + self._logger.log( + "info", "Patching MLRun version", mlrun_version=self._mlrun_version + ) provctl_patch_mlrun_log = f"/tmp/provctl-patch-mlrun-{time_string}.log" self._run_command( str(self.Constants.provctl_path), @@ -498,6 +548,11 @@ def _patch_mlrun(self): "--force", "mlrun", mlrun_archive, + # enable audit events - will be ignored by provctl if mlrun version does not support it + # TODO: remove when setup is upgraded to iguazio version >= 3.5.4 since audit events + # are enabled by default + "--feature-gates", + "mlrun.auditevents=enabled", ], detach=True, ) @@ -513,15 +568,15 @@ def _patch_mlrun(self): def _resolve_iguazio_version(self): # iguazio version is optional, if not provided, we will try to resolve it from the data node if not self._iguazio_version: - self._logger.info("Resolving iguazio version") + self._logger.log("info", "Resolving iguazio version") self._iguazio_version, _ = self._run_command( f"cat {self.Constants.igz_version_file}", verbose=False, live=False, ) self._iguazio_version = self._iguazio_version.strip().decode() - self._logger.info( - "Resolved iguazio version", iguazio_version=self._iguazio_version + self._logger.log( + "info", "Resolved iguazio version", iguazio_version=self._iguazio_version ) def _purge_mlrun_db(self): @@ -532,7 +587,7 @@ def _purge_mlrun_db(self): self._scale_down_mlrun_deployments() def _delete_mlrun_db(self): - self._logger.info("Deleting mlrun db") + self._logger.log("info", "Deleting mlrun db") mlrun_db_pod_name_cmd = self._get_pod_name_command( labels={ @@ -541,11 +596,11 @@ def _delete_mlrun_db(self): }, ) if not mlrun_db_pod_name_cmd: - self._logger.info("No mlrun db pod found") + self._logger.log("info", "No mlrun db pod found") return - self._logger.info( - "Deleting mlrun db pod", mlrun_db_pod_name_cmd=mlrun_db_pod_name_cmd + self._logger.log( + "info", "Deleting mlrun db pod", mlrun_db_pod_name_cmd=mlrun_db_pod_name_cmd ) password = "" @@ -591,7 +646,7 @@ def _get_pod_name_command(self, labels): def _scale_down_mlrun_deployments(self): # scaling down to avoid automatically deployments restarts and failures - self._logger.info("scaling down mlrun deployments") + self._logger.log("info", "scaling down mlrun deployments") self._run_kubectl_command( args=[ "scale", @@ -623,6 +678,64 @@ def _serialize_env_config(self, allow_none_values: bool = False): return yaml.safe_dump(env_config) + def _get_ingress_host(self, ingress_name: str): + host, stderr = self._run_kubectl_command( + args=[ + "get", + "ingress", + "--namespace", + self.Constants.namespace, + ingress_name, + "--output", + "jsonpath={'@.spec.rules[0].host'}", + ], + ) + if stderr: + raise RuntimeError( + f"Failed getting {ingress_name} ingress host. Error: {stderr}" + ) + return host.strip() + + def _get_service_name(self, label_selector): + service_name, stderr = self._run_kubectl_command( + args=[ + "get", + "deployment", + "--namespace", + self.Constants.namespace, + "-l", + label_selector, + "--output", + "jsonpath={'@.items[0].metadata.labels.release'}", + ], + ) + if stderr: + raise RuntimeError(f"Failed getting service name. Error: {stderr}") + return service_name.strip() + + def _get_devutils_status(self): + out, err = "", "" + try: + out, err = self._run_command( + "python3", + [ + "/home/iguazio/dev_utilities.py", + "status", + "--redis", + "--kafka", + "--mysql", + "--redisinsight", + "--output", + "json", + ], + ) + except Exception as exc: + self._logger.log( + "warning", "Failed to enrich env", exc=exc, err=err, out=out + ) + + return json.loads(out or "{}") + @click.group() def main(): @@ -659,18 +772,12 @@ def main(): @click.option("--data-cluster-ssh-username", required=True) @click.option("--data-cluster-ssh-password", required=True) @click.option("--app-cluster-ssh-password", required=True) -@click.option("--github-access-token", required=True) @click.option("--provctl-download-url", required=True) @click.option("--provctl-download-s3-access-key", required=True) @click.option("--provctl-download-s3-key-id", required=True) -@click.option("--mlrun-dbpath", required=True) -@click.option("--webapi-direct-url", required=True) -@click.option("--framesd-url", required=True) @click.option("--username", required=True) @click.option("--access-key", required=True) @click.option("--iguazio-version", default=None) -@click.option("--spark-service", required=True) -@click.option("--slack-webhook-url") @click.option("--mysql-user") @click.option("--mysql-password") @click.option("--purge-db", "-pdb", is_flag=True, help="Purge mlrun db") @@ -690,64 +797,52 @@ def run( data_cluster_ssh_username: str, data_cluster_ssh_password: str, app_cluster_ssh_password: str, - github_access_token: str, provctl_download_url: str, provctl_download_s3_access_key: str, provctl_download_s3_key_id: str, - mlrun_dbpath: str, - webapi_direct_url: str, - framesd_url: str, username: str, access_key: str, iguazio_version: str, - spark_service: str, - slack_webhook_url: str, mysql_user: str, mysql_password: str, purge_db: bool, debug: bool, ): system_test_preparer = SystemTestPreparer( - mlrun_version, - mlrun_commit, - override_image_registry, - override_image_repo, - override_mlrun_images, - data_cluster_ip, - data_cluster_ssh_username, - data_cluster_ssh_password, - app_cluster_ssh_password, - github_access_token, - provctl_download_url, - provctl_download_s3_access_key, - provctl_download_s3_key_id, - mlrun_dbpath, - webapi_direct_url, - framesd_url, - username, - access_key, - iguazio_version, - spark_service, - slack_webhook_url, - mysql_user, - mysql_password, - purge_db, - debug, + mlrun_version=mlrun_version, + mlrun_commit=mlrun_commit, + override_image_registry=override_image_registry, + override_image_repo=override_image_repo, + override_mlrun_images=override_mlrun_images, + data_cluster_ip=data_cluster_ip, + data_cluster_ssh_username=data_cluster_ssh_username, + data_cluster_ssh_password=data_cluster_ssh_password, + app_cluster_ssh_password=app_cluster_ssh_password, + provctl_download_url=provctl_download_url, + provctl_download_s3_access_key=provctl_download_s3_access_key, + provctl_download_s3_key_id=provctl_download_s3_key_id, + username=username, + access_key=access_key, + iguazio_version=iguazio_version, + mysql_user=mysql_user, + mysql_password=mysql_password, + purge_db=purge_db, + debug=debug, ) try: system_test_preparer.run() except Exception as exc: - logger.error("Failed running system test automation", exc=exc) + logger.log("error", "Failed running system test automation", exc=exc) raise @main.command(context_settings=dict(ignore_unknown_options=True)) @click.option("--mlrun-dbpath", help="The mlrun api address", required=True) -@click.option("--webapi-direct-url", help="Iguazio webapi direct url") -@click.option("--framesd-url", help="Iguazio framesd url") +@click.option("--data-cluster-ip") +@click.option("--data-cluster-ssh-username") +@click.option("--data-cluster-ssh-password") @click.option("--username", help="Iguazio running username") @click.option("--access-key", help="Iguazio running user access key") -@click.option("--spark-service", help="Iguazio kubernetes spark service name") @click.option( "--slack-webhook-url", help="Slack webhook url to send tests notifications to" ) @@ -762,34 +857,40 @@ def run( "--github-access-token", help="Github access token to use for fetching private functions", ) +@click.option( + "--save-to-path", + help="Path to save the compiled env file to", +) def env( + data_cluster_ip: str, + data_cluster_ssh_username: str, + data_cluster_ssh_password: str, mlrun_dbpath: str, - webapi_direct_url: str, - framesd_url: str, username: str, access_key: str, - spark_service: str, slack_webhook_url: str, debug: bool, branch: str, github_access_token: str, + save_to_path: str, ): system_test_preparer = SystemTestPreparer( + data_cluster_ip=data_cluster_ip, + data_cluster_ssh_password=data_cluster_ssh_password, + data_cluster_ssh_username=data_cluster_ssh_username, mlrun_dbpath=mlrun_dbpath, - webapi_direct_http=webapi_direct_url, - framesd_url=framesd_url, username=username, access_key=access_key, - spark_service=spark_service, debug=debug, slack_webhook_url=slack_webhook_url, branch=branch, github_access_token=github_access_token, ) try: - system_test_preparer.prepare_local_env() + system_test_preparer.connect_to_remote() + system_test_preparer.prepare_local_env(save_to_path) except Exception as exc: - logger.error("Failed preparing local system test environment", exc=exc) + logger.log("error", "Failed preparing local system test environment", exc=exc) raise diff --git a/automation/version/unstable_version_prefix b/automation/version/unstable_version_prefix index 88c5fb891d..bc80560fad 100644 --- a/automation/version/unstable_version_prefix +++ b/automation/version/unstable_version_prefix @@ -1 +1 @@ -1.4.0 +1.5.0 diff --git a/automation/version/version_file.py b/automation/version/version_file.py index e041b0c401..2eb6bdf720 100644 --- a/automation/version/version_file.py +++ b/automation/version/version_file.py @@ -12,12 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. # + import argparse import json import logging import os.path +import pathlib +import re import subprocess import sys +import typing + +import packaging.version # NOTE # this script is being used in all build flows before building to add version information to the code @@ -30,41 +36,316 @@ def main(): parser = argparse.ArgumentParser(description="Create or update the version file") - parser.add_argument( + subparsers = parser.add_subparsers(dest="command") + ensure_parser = subparsers.add_parser( + "ensure", help="ensure the version file is up to date" + ) + ensure_parser.add_argument( "--mlrun-version", type=str, required=False, default="0.0.0+unstable" ) + is_stable_parser = subparsers.add_parser( + "is-stable", help="check if the version is stable" + ) + is_stable_parser.add_argument("version", type=str) + + subparsers.add_parser("current-version", help="get the current version") + next_version_parser = subparsers.add_parser("next-version", help="get next version") + + # RC - bump the rc version. if current is not rc, bump patch and set rc to 1 + # RC-GRAD - bump the rc version to its graduated version (1.0.0-rc1 -> 1.0.0) + # PATCH - bump the patch version. reset rc + # MINOR - bump the minor version. reset rc / patch + # MAJOR - bump the major version. reset rc / patch / minor + next_version_parser.add_argument( + "--mode", + choices=["rc", "rc-grad", "patch", "minor", "major"], + default="rc", + help="bump the version by the given mode", + ) + args = parser.parse_args() + if args.command == "current-version": + current_version = get_current_version(read_unstable_version_prefix()) + print(current_version) + + elif args.command == "next-version": + base_version = read_unstable_version_prefix() + current_version = get_current_version(base_version) + next_version = resolve_next_version( + args.mode, + packaging.version.Version(current_version), + base_version, + get_feature_branch_feature_name(), + ) + print(next_version) + + elif args.command == "ensure": + repo_root = os.path.dirname( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + ) + version_file_path = os.path.join( + repo_root, "mlrun", "utils", "version", "version.json" + ) + create_or_update_version_file(args.mlrun_version, version_file_path) + + elif args.command == "is-stable": + is_stable = is_stable_version(args.version) + print(str(is_stable).lower()) + + +def get_current_version( + base_version: packaging.version.Version, +) -> str: + current_branch = _run_command( + "git", args=["rev-parse", "--abbrev-ref", "HEAD"] + ).strip() + feature_name = ( + resolve_feature_name(current_branch) + if current_branch.startswith("feature/") + else "" + ) + + # get last 200 commits, to avoid going over all commits + commits = _run_command("git", args=["log", "-200", "--pretty=format:'%H'"]).strip() + found_tag = None + + # most_recent_version is the most recent tag before base version + most_recent_version = None + for commit in commits.split("\n"): + # is commit tagged? + tags = _run_command("git", args=["tag", "--points-at", commit]).strip() + tags = [tag for tag in tags.split("\n") if tag] + if not tags: + continue + + for tag in tags: + + # work with semvar-like tags only + if not re.match(r"^v[0-9]+\.[0-9]+\.[0-9]+.*$", tag): + continue + + semver_tag = packaging.version.parse(tag.removeprefix("v")) + + # compare base versions on both base and current tag + # if current tag version (e.g.: 1.4.0) is smaller than base version (e.g.: 1.5.0) + # then, keep that tag version as the most recent version + # if no base-version tag was made (e.g.: when starting new version (e.g. 1.5.0) + # but no tag/release was made yet) + if packaging.version.parse( + semver_tag.base_version + ) < packaging.version.parse(base_version.base_version): + if most_recent_version: + if semver_tag > most_recent_version: + most_recent_version = semver_tag + continue + most_recent_version = semver_tag + continue + + # is feature branch? + if feature_name and semver_tag.local and feature_name in semver_tag.local: + if found_tag and semver_tag < found_tag: + continue + found_tag = semver_tag + continue + + # we found the feature branch tag, continue because + # there is no point finding other tags unrelated to feature branch now + if ( + found_tag + and found_tag.local + and feature_name + and feature_name in found_tag.local + ): + continue + + # we might not have found tag or what we found is old one? + is_rc = semver_tag.pre and semver_tag.pre[0] == "rc" + if is_rc: + if found_tag and semver_tag < found_tag: + continue + found_tag = semver_tag + continue + + # tag is not rc, not feature branch, and not older than current tag. use it + found_tag = semver_tag + + # stop here because + # we either have a tag + # or, moving back in time wont find newer tags on same branch timeline + break + + # nothing to bump, just return the version + if not found_tag: + if most_recent_version: + return version_to_mlrun_version(most_recent_version) + + return version_to_mlrun_version(base_version) + + return version_to_mlrun_version(found_tag) - create_or_update_version_file(args.mlrun_version) +def resolve_next_version( + mode: str, + current_version: packaging.version.Version, + base_version: packaging.version.Version, + feature_name: typing.Optional[str] = None, +): + if ( + base_version.major > current_version.major + or base_version.minor > current_version.minor + ): + # the current version is lower, can be because base version was not tagged yet + # make current version align with base version + suffix = "" + if mode == "rc": -def create_or_update_version_file(mlrun_version): + # index 0 because we increment rc later on + suffix += "-rc0" + current_version = packaging.version.Version(base_version.base_version + suffix) + + rc = None + if current_version.pre and current_version.pre[0] == "rc": + rc = int(current_version.pre[1]) + major, minor, patch = ( + current_version.major, + current_version.minor, + current_version.micro, + ) + if mode == "rc": + + # if current version is not RC, update its patch version + if rc is None: + patch = patch + 1 + rc = 1 + else: + rc += 1 + elif mode == "rc-grad": + rc = None + elif mode == "patch": + patch = patch + 1 + rc = None + elif mode == "minor": + minor = minor + 1 + patch = 0 + rc = None + elif mode == "major": + major = major + 1 + minor = 0 + patch = 0 + rc = None + + # when feature name is set, it means we are on a feature branch + # thus, we ensure rc is set as feature name are not meant to be "stable" + if feature_name and not rc: + rc = 1 + + new_version = f"{major}.{minor}.{patch}" + if rc is not None: + new_version = f"{new_version}-rc{rc}" + + if feature_name: + new_version = f"{new_version}+{feature_name}" + return new_version + + +def create_or_update_version_file(mlrun_version: str, version_file_path: str): git_commit = "unknown" try: - out = _run_command("git", args=["rev-parse", "HEAD"]) - git_commit = out.strip() + git_commit = _run_command("git", args=["rev-parse", "HEAD"]).strip() logger.debug("Found git commit: {}".format(git_commit)) except Exception as exc: logger.warning("Failed to get version", exc_info=exc) + # get feature branch name from git branch + git_branch = "" + try: + git_branch = _run_command( + "git", args=["rev-parse", "--abbrev-ref", "HEAD"] + ).strip() + logger.debug("Found git branch: {}".format(git_branch)) + except Exception as exc: + logger.warning("Failed to get git branch", exc_info=exc) + + # Enrich the version with the feature name (unless version is unstable) + if ( + "+unstable" not in mlrun_version + and git_branch + and git_branch.startswith("feature/") + ): + feature_name = resolve_feature_name(git_branch) + if not mlrun_version.endswith(feature_name): + mlrun_version = f"{mlrun_version}+{feature_name}" + + # Check if the provided version is a semver and followed by a "-" + semver_pattern = r"^[0-9]+\.[0-9]+\.[0-9]+" # e.g. 0.6.0- + rc_semver_pattern = rf"{semver_pattern}-(a|b|rc)[0-9]+$" + + # In case of semver - do nothing + if re.match(semver_pattern, mlrun_version): + pass + + # In case of rc semver - replace the first occurrence of "-" with "+" to align with PEP 440 + # https://peps.python.org/pep-0440/ + elif re.match(rc_semver_pattern, mlrun_version): + mlrun_version = mlrun_version.replace("-", "+", 1) + + # In case of some free text - check if the provided version matches the semver pattern + elif not re.match(r"^[0-9]+\.[0-9]+\.[0-9]+.*$", mlrun_version): + mlrun_version = "0.0.0+" + mlrun_version + version_info = { "version": mlrun_version, "git_commit": git_commit, } - repo_root = os.path.dirname( - os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - ) - version_file_path = os.path.join( - repo_root, "mlrun", "utils", "version", "version.json" - ) logger.info("Writing version info to file: {}".format(str(version_info))) with open(version_file_path, "w+") as version_file: json.dump(version_info, version_file, sort_keys=True, indent=2) +def resolve_feature_name(branch_name): + feature_name = branch_name.replace("feature/", "") + feature_name = feature_name.lower() + + # replace non-alphanumeric characters with "-" to align with PEP 440 and docker tag naming + feature_name = re.sub(r"\+\./\\", "-", feature_name) + return feature_name + + +def read_unstable_version_prefix(): + with open( + pathlib.Path(__file__).absolute().parent / "unstable_version_prefix" + ) as fp: + return packaging.version.Version(fp.read().strip()) + + +def version_to_mlrun_version(version): + version_str = f"{version.major}.{version.minor}.{version.micro}" + if version.pre and version.pre[0] == "rc": + version_str += f"-rc{version.pre[1]}" + + if version.local: + version_str += f"+{version.local}" + return version_str + + +def get_feature_branch_feature_name() -> typing.Optional[str]: + current_branch = _run_command( + "git", args=["rev-parse", "--abbrev-ref", "HEAD"] + ).strip() + return ( + resolve_feature_name(current_branch) + if current_branch.startswith("feature/") + else "" + ) + + +def is_stable_version(mlrun_version: str) -> bool: + return re.match(r"^\d+\.\d+\.\d+$", mlrun_version) is not None + + def _run_command(command, args=None): if args: command += " " + " ".join(args) diff --git a/dev-requirements.txt b/dev-requirements.txt index 5f60e63bc9..2f6e876df9 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,6 @@ pytest~=7.0 twine~=3.1 +databricks-sdk~=0.1.8 black[jupyter]~=22.0 flake8~=5.0 pytest-asyncio~=0.15.0 diff --git a/dockerfiles/common/Dockerfile b/dockerfiles/common/Dockerfile index 406d7d893e..7e1fdcc486 100644 --- a/dockerfiles/common/Dockerfile +++ b/dockerfiles/common/Dockerfile @@ -17,4 +17,4 @@ ARG MLRUN_BASE_IMAGE=mlrun/mlrun:unstable-core FROM ${MLRUN_BASE_IMAGE} COPY . . -RUN python -m pip install .[complete] +RUN python -m pip install -U .[complete] diff --git a/dockerfiles/jupyter/requirements.txt b/dockerfiles/jupyter/requirements.txt index 5747a632c6..b4c51f6b12 100644 --- a/dockerfiles/jupyter/requirements.txt +++ b/dockerfiles/jupyter/requirements.txt @@ -6,8 +6,9 @@ scikit-plot~=0.3.7 xgboost~=1.1 graphviz~=0.20.0 python-dotenv~=0.17.0 -nuclio-jupyter[jupyter-server]~=0.9.10 +nuclio-jupyter[jupyter-server]~=0.9.11 nbclassic>=0.2.8 # added to tackle security vulnerabilities notebook~=6.4 Pillow~=9.0 +jupyterlab-git~=0.41.0 diff --git a/dockerfiles/mlrun-api/requirements.txt b/dockerfiles/mlrun-api/requirements.txt index 3901bb83e1..63ad7deba4 100644 --- a/dockerfiles/mlrun-api/requirements.txt +++ b/dockerfiles/mlrun-api/requirements.txt @@ -3,4 +3,4 @@ dask-kubernetes~=0.11.0 apscheduler~=3.6 sqlite3-to-mysql~=1.4 objgraph~=3.5 -igz-mgmt~=0.0.8 +igz-mgmt~=0.0.10 diff --git a/dockerfiles/test/Dockerfile b/dockerfiles/test/Dockerfile index 3649b3bbd3..3fb85d9c46 100644 --- a/dockerfiles/test/Dockerfile +++ b/dockerfiles/test/Dockerfile @@ -32,6 +32,7 @@ RUN apt-get update && apt-get install --no-install-recommends -y \ apt-transport-https \ ca-certificates \ g++ \ + git \ git-core \ gnupg2 \ graphviz \ @@ -39,6 +40,10 @@ RUN apt-get update && apt-get install --no-install-recommends -y \ software-properties-common \ && rm -rf /var/lib/apt/lists/* +# set initial git config +RUN git config --global user.email "test@mlrun.org" \ + && git config --global user.name "MLRun Test" \ + && git config --global init.defaultBranch "main" ARG MLRUN_PIP_VERSION=22.3.0 RUN python -m pip install --upgrade pip~=${MLRUN_PIP_VERSION} diff --git a/docs/_static/images/model-monitoring-datasource.png b/docs/_static/images/model-monitoring-datasource.png new file mode 100644 index 0000000000..d3fbf45274 Binary files /dev/null and b/docs/_static/images/model-monitoring-datasource.png differ diff --git a/docs/api/mlrun.serving.rst b/docs/api/mlrun.serving.rst index 6efd3dcdb1..0394df9af8 100644 --- a/docs/api/mlrun.serving.rst +++ b/docs/api/mlrun.serving.rst @@ -1,6 +1,10 @@ mlrun.serving ============== +.. autoclass:: mlrun.serving.states.BaseStep + :members: to, error_handler + :private-members: + .. automodule:: mlrun.serving :members: :show-inheritance: diff --git a/docs/change-log/index.md b/docs/change-log/index.md index 6d3103c0d8..387c2d1509 100644 --- a/docs/change-log/index.md +++ b/docs/change-log/index.md @@ -59,34 +59,12 @@ New sections describing [Git best practices](../projects/git-best-practices.html The MLRun server is now based on Python 3.9. It's recommended to move the client to Python 3.9 as well. MLRun v1.3.0 maintains support for mlrun base images that are based on python 3.7. To differentiate between the images, the images based on -python 3.7 have the suffix: `-py37`. The correct version is automatically chosen for the built-in MLRun images according to the Python version of the MLRun client (for example, a 3.7 Jupyter gets the `-py37` images). +python 3.7 have the suffix: `-py37`. The correct version is automatically chosen for the built-in MLRun images according to the Python version of the MLRun client. + +See instructions in [Set up your environment](../install/remote.html). MLRun is pre-installed in CE Jupyter. -To install on a **Python 3.9** environment, run:
-``` -./align_mlrun.sh -``` - -To install on a **Python 3.7** environment (and optionally upgrade to python 3.9), run: - -1. Configure the Jupyter service with the env variable `JUPYTER_PREFER_ENV_PATH=false`. -2. Within the Jupyter service, open a terminal and update conda and pip to have an up-to-date pip resolver. - -``` -$CONDA_HOME/bin/conda install -y conda=23.1.0 -$CONDA_HOME/bin/conda install -y 'pip>=22.0' -``` -3. If you wish to upgrade to python 3.9, create a new conda env and activate it: -``` -conda create -n python39 python=3.9 ipykernel -y -conda activate python39 -``` -4. Install mlrun: -``` -./align_mlrun.sh -``` - ### New and updated features #### Feature store @@ -203,6 +181,7 @@ These APIs will be removed from the v1.5.0 code. A FutureWarning appears if you | `init_functions` in pipelines | Add the function initialization to the pipeline code instead | | The entire `mlrun/mlutils` library | `mlrun.framework` | | `run_pipeline` | `project.run` | +| `user_project` | Use `get_or_create_project` or `load_project` to configure the active project. | **REST APIs deprecated and removed from v1.3.0 code** diff --git a/docs/cheat-sheet.md b/docs/cheat-sheet.md index af9987d34c..707fb0ec96 100644 --- a/docs/cheat-sheet.md +++ b/docs/cheat-sheet.md @@ -300,8 +300,8 @@ Docs: [Nuclio Triggers](https://github.com/nuclio/nuclio-jupyter/blob/developmen import nuclio serve = mlrun.import_function('hub://v2_model_server') -# HTTP trigger -serve.with_http(workers=8, port=31010, worker_timeout=10) +# Set amount of workers +serve.with_http(workers=8, worker_timeout=10) # V3IO stream trigger serve.add_v3io_stream_trigger(stream_path='v3io:///projects/myproj/stream1', name='stream', group='serving', seek_to='earliest', shards=1) @@ -317,6 +317,11 @@ serve.add_trigger("cron_interval", spec=nuclio.CronTrigger(interval="10s")) serve.add_trigger("cron_schedule", spec=nuclio.CronTrigger(schedule="0 9 * * *")) ``` +```{admonition} Note +The worker uses separate worker scope. This means that each worker has a copy of the variable, +and all changes are kept within the worker (change by worker x, do not affect worker y). +``` + ### Building Docker images Docs: [Build function image](./runtimes/image-build.html), [Images and their usage in MLRun](./runtimes/images.html#images-usage) diff --git a/docs/concepts/notifications.md b/docs/concepts/notifications.md index b26820f6fa..8440858565 100644 --- a/docs/concepts/notifications.md +++ b/docs/concepts/notifications.md @@ -35,6 +35,8 @@ These cases are: notifications mechanism. This means you need to watch the pipeline in order for its notifications to be sent. - Dask: Dask runs are always local (against a remote dask cluster), so the notifications are sent locally as well. +> **Disclaimer:** Local notifications aren't persisted in mlrun API + ## Notification Params and Secrets The notification parameters might contain sensitive information (slack webhook, git token, etc.). For this reason, when a notification is created its params are masked in a kubernetes secret. The secret is named @@ -79,6 +81,7 @@ For pipelines, you configure the notifications on the project notifiers. For exa ```python project.notifiers.add_notification(notification_type="slack",params={"webhook":""}) +project.notifiers.add_notification(notification_type="git", params={"repo": "", "issue": "", "token": ""}) ``` Instead of passing the webhook in the notification params, it is also possible in a Jupyter notebook to use the ` %env` magic command: @@ -86,6 +89,12 @@ magic command: %env SLACK_WEBHOOK= ``` +Editing and removing notifications is done similarly with the following methods: +```python +project.notifiers.edit_notification(notification_type="slack",params={"webhook":""}) +project.notifiers.remove_notification(notification_type="slack") +``` + ## Setting Notifications on Live Runs You can set notifications on live runs via the `set_run_notifications` method. For example: diff --git a/docs/contents.rst b/docs/contents.rst index eff70d5ec6..08cf7b8400 100644 --- a/docs/contents.rst +++ b/docs/contents.rst @@ -31,6 +31,7 @@ Table of Contents development/index deployment/index monitoring/index + projects/ci-cd-automate .. toctree:: :maxdepth: 1 diff --git a/docs/install/remote.md b/docs/install/remote.md index b070944941..f1e92129d7 100644 --- a/docs/install/remote.md +++ b/docs/install/remote.md @@ -5,7 +5,8 @@ You can write your code on a local machine while running your functions on a rem **In this section** - [Prerequisites](#prerequisites) -- [Set up client environment](#set-up-client-environment) +- [Set up a Python 3.7 client environment](#set-up-a-python-3-7-client-environment-iguazio-versions-up-to-and-including-v3-5-2) +- [Set up a Python 3.9 client environment](#set-up-a-python-3-9-client-environment) - [Configure remote environment](#configure-remote-environment) - [Using `mlrun config set` command in MLRun CLI](#using-mlrun-config-set-command-in-mlrun-cli) - [Using `mlrun.set_environment` command in MLRun SDK](#using-mlrun-set-environment-command-in-mlrun-sdk) @@ -17,10 +18,43 @@ You can write your code on a local machine while running your functions on a rem Before you begin, ensure that the following prerequisites are met: Applications: -- Python 3.9 +- Python 3.9 (or Python 3.7 for Iguazio versions up to and including v3.5.2) - Recommended pip 22.x+ -## Set up client environment +The MLRun server is now based on a Python 3.9 environment. It's recommended to move the client to a Python 3.9 environment as well. + +MLRun v1.3.x maintains support for mlrun base images that are based on a python 3.7 environment. To differentiate between the images, the images based on +python 3.7 have the suffix: `-py37`. The correct version is automatically chosen for the built-in MLRun images according to the Python version of the MLRun client (for example, a 3.7 Jupyter gets the `-py37` images). + + +## Set up a Python 3.7 client environment (Iguazio versions up to and including v3.5.2) + +```{admonition} Note +There is a known bug with nbformat on the Jupyter version in Iguazio up to and including v3.5.2, +which requires upgrading nbformat to 5.7.0. When using an older nbformat, some Jupyter Notebooks do not open. +``` + +To install on a **Python 3.7** environment (and optionally upgrade to python 3.9 environment): + +1. Configure the Jupyter service with the env variable `JUPYTER_PREFER_ENV_PATH=false`. +2. Within the Jupyter service, open a terminal and update conda and pip to have an up-to-date pip resolver. + +``` +$CONDA_HOME/bin/conda install -y conda=23.1.0 +$CONDA_HOME/bin/conda install -y 'pip>=22.0' +$CONDA_HOME/bin/conda install -y nbformat=5.7.0 +``` +3. If you want to upgrade to a Python 3.9 environment, create a new conda env and activate it: +``` +conda create -n python39 python=3.9 ipykernel -y +conda activate python39 +``` +4. Install mlrun: +``` +./align_mlrun.sh +``` + +## Set up a Python 3.9 client environment 1. **Basic**
Run ```pip install mlrun``` diff --git a/docs/monitoring/model-monitoring-deployment.ipynb b/docs/monitoring/model-monitoring-deployment.ipynb index ebffbe5307..2ef6328fa3 100644 --- a/docs/monitoring/model-monitoring-deployment.ipynb +++ b/docs/monitoring/model-monitoring-deployment.ipynb @@ -245,55 +245,40 @@ "* **Predictions/s** — the model predictions per second displayed in 5 second intervals for 5 minutes (rolling)\n", "* **Predictions Count** — the number of predictions the model makes for 5 minutes and 1 hour rolling windows\n", "\n", - "### Configuring Grafana dashboards\n", + "### Configuring Grafana datasources\n", "Verify that you have a Grafana service running in your Iguazio MLOps Platform.\n", - "If you do not have a Grafana service running,\n", - "see Adding Grafana Dashboards to create and configure it. When you create the service: In the **Custom Parameters** tab, **Platform data-access user** parameter, select a user with access to the `/user/pipelines` directory.\n", + "If you do not have a Grafana service running, please follow Creating a Service to add it to your platform.\n", + " When you create the service: In the **Custom Parameters** tab, **Platform data-access user** parameter, select a user with access to the `/user/pipelines` directory.\n", "\n", - "**For working with Iguazio 3.0.x:**\n", - "1. Make sure you have the `model-monitoring` as a Grafana data source configured in your Grafana service. If not,\n", - "add it by:\n", - " 1. Open your grafana service.\n", + "In addition, you will have to add access keys to your model-monitoring data source:\n", + " 1. Open your Grafana service.\n", " 2. Navigate to **Configuration | Data Sources**.\n", - " 3. Press **Add data source**.\n", - " 4. Select the **SimpleJson** datasource and configure the following parameters.\n", - " ```\n", - " Name: model-monitoring\n", - " URL: http://mlrun-api:8080/api/grafana-proxy/model-endpoints\n", - " Access: Server (default)\n", - "\n", - " ## Add a custom header of:\n", - " X-V3io-Session-Key: \n", - " ```\n", + " 3. Press **model-monitoring**.\n", + " 4. In Custom HTTP Headers, configure the cookie parameter. Set the value of `cookie` to:\n", + " `session=j:{\"sid\": \"\"}`\n", " 5. Press **Save & Test** for verification. You'll receive a confirmation with either a success or a failure message.\n", "\n", - "2. Download the following monitoring dashboards:\n", - " * {download}`Model Monitoring - Overview <./dashboards/model-monitoring-overview.json>`\n", - " * {download}`Model Monitoring - Details <./dashboards/model-monitoring-details.json>`\n", - " * {download}`Model Monitoring - Performance <./dashboards/model-monitoring-performance.json>`\n", + "\"Grafana
\n", "\n", - "3. Import the downloaded dashboards to your Grafana service:\n", - " 1. Navigate to your Grafana service in the Services list and press it.\n", - " 2. Press the dashboards icon in left menu.\n", - " 3. In the Dashboard Management screen, press **IMPORT**, and select one file to import. Repeat this step for each dashboard.\n", "\n", - "**For working with Iguazio 3.2.x and later:**\n", - "Add access keys to your model-monitoring data source:\n", - " 1. Open your Grafana service.\n", - " 2. Navigate to **Configuration | Data Sources**.\n", - " 3. Press **mlrun-model-monitoring**.\n", - " 4. In Custom HTTP Headers, configure the cookie parameter. Set the value of `cookie` to:" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "`session=j:{\"sid\": \"\"}`" + "### Configuring Grafana dashboards\n", + "From Iguazio 3.5.3, the overview, details, and performance dashboards can be found under **Dashboards | Manage | private**.\n", + "You can also import the latest dashboards versions by downloading them using the following links:\n", + "\n", + "**Iguazio 3.5.3**\n", + " * {download}`Model Monitoring - Overview <./dashboards/model-monitoring-overview.json>`\n", + " * {download}`Model Monitoring - Details <./dashboards/model-monitoring-details.json>`\n", + " * {download}`Model Monitoring - Performance <./dashboards/model-monitoring-performance.json>`\n", + "\n", + "**Iguazio 3.5.2 and older**\n", + " * {download}`Model Monitoring - Overview <./dashboards/iguazio-3.5.2-and-older/model-monitoring-overview.json>`\n", + " * {download}`Model Monitoring - Details <./dashboards/iguazio-3.5.2-and-older/model-monitoring-overview.json>`\n", + " * {download}`Model Monitoring - Performance <./dashboards/iguazio-3.5.2-and-older/model-monitoring-overview.json>`\n", + "\n", + "Upload dashboards to your Grafana service by:\n", + " 1. Navigate to your Grafana service in the Services list and press it.\n", + " 2. Press the dashboards icon in left menu.\n", + " 3. In the Dashboard Management screen, press **IMPORT**, and select one file to import. Repeat this step for each dashboard.\n" ] }, { @@ -304,7 +289,7 @@ } }, "source": [ - "The overview, details, and performance dashboards are in **Dashboards | Manage | private**\n", + "\n", "\n", "```{note}\n", "You need to train and deploy a model to see results in the dashboards.\n", diff --git a/docs/projects/automate-project-git-source.ipynb b/docs/projects/automate-project-git-source.ipynb new file mode 100644 index 0000000000..2ac1cc87cb --- /dev/null +++ b/docs/projects/automate-project-git-source.ipynb @@ -0,0 +1,2056 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "596f9a2a", + "metadata": {}, + "source": [ + "(automate-project-git-source)=\n", + "# Create a project using a Git source\n" + ] + }, + { + "cell_type": "markdown", + "id": "a19681b6", + "metadata": {}, + "source": [ + "This example shows how to create a project in MLRun for CI/CD automation based on a remote source, in this case Git.\n", + "The process is equivalent to using tar.gz, zip archive files. This example assumes you have functions that are ready \n", + "to run, and that you already cloned the files to your local file system.
" + ] + }, + { + "cell_type": "markdown", + "id": "694b3829", + "metadata": {}, + "source": [ + "The flow covers these steps to create a CI/CD project in MLRun:\n", + "1. [Before you start](#before-you-start)\n", + "2. [Creating a project](#creating-a-project)\n", + "2. [Setting a project source: remote or archive](#setting-a-project-source-either-remote-or-archive)\n", + "3. [Setting and running functions using Git source code](#setting-and-running-functions-using-git-source-code)\n", + "4. [Setting a workflow](#setting-a-workflow)\n", + "4. [Running a workflow using a Git source](#running-a-workflow-using-a-git-source)\n", + "5. [Setting and registering the project artifacts](#setting-and-registering-the-project-artifacts)\n", + "6. [Create and save the project YAML](#create-and-save-the-project-yaml)\n", + "7. [Creating and pushing changes to your Git repo or archive file](#creating-and-pushing-changes-to-your-git-repo-or-archive-file)" + ] + }, + { + "cell_type": "markdown", + "id": "44d0d6dc", + "metadata": {}, + "source": [ + "## Before you start" + ] + }, + { + "cell_type": "markdown", + "id": "461c29a2", + "metadata": {}, + "source": [ + "Install MLRun. If MLRun is not installed use ``pip install mlrun==`` or
``sh align_mlrun.sh`` \n", + "(the default MLRun installer that automatically installs the server version).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cc9586ab", + "metadata": {}, + "outputs": [], + "source": [ + "import mlrun" + ] + }, + { + "cell_type": "markdown", + "id": "f0317e09", + "metadata": {}, + "source": [ + "Before running this notebook, clone the Git repo to your local machine." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c7eb4967", + "metadata": {}, + "outputs": [], + "source": [ + "# delete the clone folder if exists\n", + "!rm -rf ./clone" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "61b6eea1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cloning into './clone'...\n", + "remote: Enumerating objects: 209, done.\u001b[K\n", + "remote: Counting objects: 100% (209/209), done.\u001b[K\n", + "remote: Compressing objects: 100% (150/150), done.\u001b[K\n", + "remote: Total 209 (delta 118), reused 129 (delta 53), pack-reused 0\u001b[K\n", + "Receiving objects: 100% (209/209), 162.20 KiB | 1.65 MiB/s, done.\n", + "Resolving deltas: 100% (118/118), done.\n" + ] + } + ], + "source": [ + "# clone the repo to your local machine\n", + "!git clone https://github.com/your repo ./clone" + ] + }, + { + "cell_type": "markdown", + "id": "609a1704", + "metadata": {}, + "source": [ + "## Creating a project" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "26f79d2e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:04:46,772 [info] loaded project new-ci-cd-proj from MLRun DB\n" + ] + } + ], + "source": [ + "# Create a new project or load it from DB\n", + "project = mlrun.get_or_create_project(name=\"new-ci-cd-proj\", context=\"./clone\")" + ] + }, + { + "cell_type": "markdown", + "id": "f4a6570a", + "metadata": {}, + "source": [ + "or for loading from a private repo:\n", + "\n", + "```\n", + "# project = mlrun.get_or_create_project(name='new-ci-cd-proj',context='./',init_git=True,secrets={\"GIT_TOKEN\":})\n", + "```\n", + "\n", + "See more details in {py:class}`~mlrun.projects.get_or_create_project` and {ref}`secrets`." + ] + }, + { + "cell_type": "markdown", + "id": "3a8c8c00", + "metadata": {}, + "source": [ + "When you create a new project, MLRun creates a light project YAML, for example: \n", + "````\n", + "kind: project\n", + "metadata:\n", + " name: new-ci-cd-proj\n", + " created: '2022-06-30T09:41:05.612000'\n", + "spec:\n", + " functions: []\n", + " workflows: []\n", + " artifacts: []\n", + " desired_state: online\n", + "status:\n", + " state: online\n", + "````\n", + "\n", + "As you proceed, more information (project metadata) is added to the project YAML." + ] + }, + { + "cell_type": "markdown", + "id": "b10081bc", + "metadata": {}, + "source": [ + "## Setting a project source, either remote or archive " + ] + }, + { + "cell_type": "markdown", + "id": "5697340c", + "metadata": {}, + "source": [ + "Define the project source, and optionally `pull_at_runtime` flag value, and the project working dir, and add them to \n", + "the `project.yaml` by using the {py:class}`~mlrun.projects.MlrunProject.set_source` method. \n", + "To copy these values also to the functions spec, set `with_repo=True` in the `project.set_function` method. \n", + "\n", + "- If `pull_at_runtime=True` MLRun loads the git/archive repo into the function container at run time and does not require a build. (This is simpler when developing, for production it’s preferable to build the image with the code.)\n", + "- If `pull_at_runtime` is **not set to `True`**, you need to deploy the functions (with `function.deploy()`) to build a container.\n", + "\n", + "See more about `pull_at_runtime` in [Loading the code from container vs. loading the code at runtime](./git-best-practices.html#loading-the-code-from-container-vs-loading-the-code-at-runtime).
\n", + "See also {py:class}`KubejobRuntime.with_source_archive ` and {py:class}`RemoteRuntime.with_source_archive `." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ac7f7aed", + "metadata": {}, + "outputs": [], + "source": [ + "# Add the git branch or references to the source e.g.: 'git://url/org/repo.git#`.\n", + "source = \"git://github.com/mlrun/ci-cd-tutorial.git\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d735fcac", + "metadata": {}, + "outputs": [], + "source": [ + "# Set the project source\n", + "project.set_source(source=source, pull_at_runtime=True)" + ] + }, + { + "cell_type": "markdown", + "id": "0ac2414b", + "metadata": {}, + "source": [ + "## Setting and running functions using Git source code" + ] + }, + { + "cell_type": "markdown", + "id": "551d1467", + "metadata": {}, + "source": [ + "This section describes: [fetching the data](#fetching-the-functions-data); [running the function](#running-the-function); [training the model using the fetched data](#training-the-model); and [serving the function](#serving-the-function).\n", + "\n", + "The code source files are usually stored under a folder named `./src` in the project context,
\n", + "for example: `./project-context/src/data_fetch.py`" + ] + }, + { + "cell_type": "markdown", + "id": "43e698b8", + "metadata": {}, + "source": [ + "### Setting the project's functions\n", + "\n", + "To set the function's definitions, use the {py:class}`~mlrun.projects.MlrunProject.set_function` method. \n", + "This sets the function's metadata in the project YAML, for example: \n", + "function source (YAML, py, ipynb, function object), name of the function, function handler, function image, \n", + "function kind, and function requirements.\n", + "\n", + "See more details in {py:class}`~mlrun.projects.MlrunProject.set_function`." + ] + }, + { + "cell_type": "markdown", + "id": "671d5f4e", + "metadata": {}, + "source": [ + "### Fetching the function's data" + ] + }, + { + "cell_type": "markdown", + "id": "0a2c81b7", + "metadata": {}, + "source": [ + "```{admonition} Tip: Using tags \n", + "This example includes a `tag` value that is used as the Git tag for the release after completing the development. The tag \n", + "must be added manually to the function. (This tag is internal to MLRun and is not taken from Git.)\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "9769cdd7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Set data_fetch function to the project.yaml file\n", + "project.set_function(\n", + " func=\"./src/data_fetch.py\",\n", + " name=\"data-fetch\",\n", + " handler=\"data_fetch\",\n", + " kind=\"job\",\n", + " image=\"mlrun/mlrun\",\n", + " with_repo=True,\n", + " tag=\"v4\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e74fad55", + "metadata": {}, + "source": [ + "### Running the function\n", + "\n", + "After you set the function in the project, get the function object with the \n", + "{py:class}`~mlrun.projects.MlrunProject.get_function` method.\n", + "\n", + "\n", + "```{admonition} Tip: Change the function spec with get_function\n", + "You can use the `get_function` method to change the function spec. For example, if you \n", + "change the function resources and then run the function, it runs with those changes and the changes are stored in \n", + "the project object cache:\n", + "\n", + " \n", + " data_fetch_func = mlrun.get_function('data-fetch')\n", + " data_fetch_func.with_requests(mem='1G',cpu=3)\n", + " data_fetch_run = project.run_function('data-fetch')\n", + " \n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "68b61b18", + "metadata": {}, + "source": [ + "Run the function using the {py:class}`~mlrun.projects.MlrunProject.run_function` method both to \n", + "[run jobs locally](#running-the-function-locally) \n", + "and, remotely on the [runtime/cluster](#running-the-function-remotely-on-your-cluster). If \n", + "there are any requirements, you need to build a new \n", + "image before you run a function. See more details in {ref}`build-function-image`.\n", + "\n", + "#### Running the function locally\n", + "\n", + "First, run the function using the code files from your project context folder on your local file system, for debugging the function. Once you are satisfied, continue with [Running the function remotely on your cluster](#running-the-function-remotely-on-your-cluster). \n", + "\n", + "To run the code locally, use `local=True`." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "baa498e5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:04:46,993 [info] Storing function: {'name': 'data-fetch-data-fetch', 'uid': 'a6e5cc8f573e41f6ae6ef1c049b6e50a', 'db': 'http://mlrun-api:8080'}\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
new-ci-cd-proj-shapira0May 17 09:04:47completeddata-fetch-data-fetch
v3io_user=shapira
kind=
owner=shapira
host=jupyter-shapira-7fc985f9db-cp8x9
release=v3
train-dataset
test-dataset
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + " > to track results use the .show() or .logs() methods or click here to open in UI" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:04:50,355 [info] run executed, status=completed: {'name': 'data-fetch-data-fetch'}\n" + ] + } + ], + "source": [ + "data_fetch_run = project.run_function(\n", + " function=\"data-fetch\", returns=[\"train-dataset\", \"test-dataset\"], local=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a5953eab", + "metadata": {}, + "source": [ + "#### Running the function remotely on your cluster\n", + "\n", + "```{admonition} Tip: Using a relative handler\n", + "If your code is saved to your remote source, you can run the function from a remote source by adding \n", + "`with_repo=True`. You can also specify a relative handler (folder_name.file_name.function_handler) to point to the python \n", + "code file. (This paradigm does not support running functions in local.)\n", + "\n", + " \n", + " project.set_function(name=\\\"training\\\",\n", + " handler=\\\"function.model_training\\\",\n", + " image=\\\"mlrun/mlrun\\\", kind=\\\"job\\\",with_repo=True\n", + " )\n", + " \n", + "```\n", + "Use the code files from the remote project source (`local=False`)." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "28a916b8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:04:50,395 [info] Storing function: {'name': 'data-fetch-data-fetch', 'uid': '860b9700cd3f4724a0669cb7c9732beb', 'db': 'http://mlrun-api:8080'}\n", + "> 2023-05-17 09:04:50,649 [info] Job is running in the background, pod: data-fetch-data-fetch-qd874\n", + "final state: completed\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
new-ci-cd-proj-shapira0May 17 09:04:54completeddata-fetch-data-fetch
v3io_user=shapira
kind=job
owner=shapira
mlrun/client_version=1.3.1-rc5
mlrun/client_python_version=3.7.6
host=data-fetch-data-fetch-qd874
release=v3
train-dataset
test-dataset
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + " > to track results use the .show() or .logs() methods or click here to open in UI" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:05:03,834 [info] run executed, status=completed: {'name': 'data-fetch-data-fetch'}\n" + ] + } + ], + "source": [ + "data_fetch_run = project.run_function(\n", + " function=\"data-fetch\", returns=[\"train-dataset\", \"test-dataset\"], local=False\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a7ccfb7c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'train-dataset': 'store://artifacts/new-ci-cd-proj-shapira/data-fetch-data-fetch_train-dataset:860b9700cd3f4724a0669cb7c9732beb',\n", + " 'test-dataset': 'store://artifacts/new-ci-cd-proj-shapira/data-fetch-data-fetch_test-dataset:860b9700cd3f4724a0669cb7c9732beb'}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_fetch_run.outputs" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "75b31f43", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...111213141516171819label
140.6554001.357176-0.3802522.2366120.102893-0.0386780.1010611.5557701.1167340.146883...-2.1323360.7397350.4526150.2994270.683967-0.0890780.609046-0.895865-0.5784051
270.8341070.572216-0.8727510.519342-1.1017980.2599350.398852-0.2994850.8211540.018271...-1.634293-0.6185841.354337-1.1362381.248243-0.5938050.2667411.1806651.2123831
79-2.0403703.446461-0.269668-0.8758621.3473291.4120330.7647142.1615310.390874-0.900138...-0.9041112.640541-2.483898-1.619484-3.6763580.704040-3.1920031.6695270.7820621
\n", + "

3 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 \\\n", + "14 0.655400 1.357176 -0.380252 2.236612 0.102893 -0.038678 0.101061 \n", + "27 0.834107 0.572216 -0.872751 0.519342 -1.101798 0.259935 0.398852 \n", + "79 -2.040370 3.446461 -0.269668 -0.875862 1.347329 1.412033 0.764714 \n", + "\n", + " 7 8 9 ... 11 12 13 14 \\\n", + "14 1.555770 1.116734 0.146883 ... -2.132336 0.739735 0.452615 0.299427 \n", + "27 -0.299485 0.821154 0.018271 ... -1.634293 -0.618584 1.354337 -1.136238 \n", + "79 2.161531 0.390874 -0.900138 ... -0.904111 2.640541 -2.483898 -1.619484 \n", + "\n", + " 15 16 17 18 19 label \n", + "14 0.683967 -0.089078 0.609046 -0.895865 -0.578405 1 \n", + "27 1.248243 -0.593805 0.266741 1.180665 1.212383 1 \n", + "79 -3.676358 0.704040 -3.192003 1.669527 0.782062 1 \n", + "\n", + "[3 rows x 21 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_fetch_run.artifact(\"train-dataset\").as_df().sample(3)" + ] + }, + { + "cell_type": "markdown", + "id": "619375eb", + "metadata": {}, + "source": [ + "### Training the model" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "5d2ac325", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "project.set_function(\n", + " func=\"./src/train.py\",\n", + " name=\"train\",\n", + " handler=\"train\",\n", + " kind=\"job\",\n", + " image=\"mlrun/mlrun\",\n", + " with_repo=True,\n", + " tag=\"v4\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "b1a3ac58", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:05:04,113 [info] Storing function: {'name': 'train-train', 'uid': '423d664e6e684b1fb9acc9e62189d5b4', 'db': 'http://mlrun-api:8080'}\n", + "> 2023-05-17 09:05:04,362 [info] Job is running in the background, pod: train-train-7z8z8\n", + "final state: completed\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
new-ci-cd-proj-shapira0May 17 09:05:08completedtrain-train
v3io_user=shapira
kind=job
owner=shapira
mlrun/client_version=1.3.1-rc5
mlrun/client_python_version=3.7.6
host=train-train-7z8z8
release=v3
train_data
test_data
accuracy=0.85
f1_score=0.8421052631578948
precision_score=1.0
recall_score=0.7272727272727273
feature-importance
test_set
confusion-matrix
roc-curves
calibration-curve
model
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + " > to track results use the .show() or .logs() methods or click here to open in UI" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:05:25,610 [info] run executed, status=completed: {'name': 'train-train'}\n" + ] + } + ], + "source": [ + "train_run = project.run_function(\n", + " function=\"train\",\n", + " inputs={\n", + " \"train_data\": data_fetch_run.outputs[\"train-dataset\"],\n", + " \"test_data\": data_fetch_run.outputs[\"test-dataset\"],\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "74feafe7", + "metadata": {}, + "source": [ + "### Serving the function" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "a04b55d3", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "mlrun-flow\n", + "\n", + "\n", + "\n", + "_start\n", + "\n", + "start\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_start->\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "model\n", + "\n", + "model\n", + "\n", + "\n", + "\n", + "->model\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create a serving function object\n", + "serving = mlrun.new_function(\n", + " name=\"serving\", kind=\"serving\", image=\"mlrun/mlrun\", tag=\"v4\"\n", + ")\n", + "\n", + "# Add a model to the model serving function object\n", + "serving.add_model(\n", + " key=\"model\",\n", + " model_path=train_run.outputs[\"model\"],\n", + " class_name=\"mlrun.frameworks.sklearn.SklearnModelServer\",\n", + ")\n", + "\n", + "# Plot the serving graph\n", + "serving.spec.graph.plot(rankdir=\"LR\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "ddafe3c4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:05:25,815 [info] function spec saved to path: ././clone/function_spec/serving.yaml\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Save the function spec into a .yaml file and register it in the project\n", + "serving.export(target=f\"./{project.context}/function_spec/serving.yaml\")\n", + "project.set_function(func=\"./function_spec/serving.yaml\", name=\"serving\")" + ] + }, + { + "cell_type": "markdown", + "id": "4c2675db", + "metadata": {}, + "source": [ + "```{admonition} CI/CD configuration considerations\n", + "- When creating a serving function, the function spec contains metadata of the function steps or the serving function \n", + "models. Therefore, you need to create a function.yaml file by using the {py:class}`~mlrun.runtimes.BaseRuntime.export` \n", + "method that exports the function object to a yaml file (that is saved in the function_spec folder). Then set the function \n", + "with this yaml file. This approach saves all of the function spec for future deployments. (If you don't set the function yaml, you'll need to set the function steps or models to the function when loading the project.)\n", + "For example:
\n", + " \n", + " .export('./function_spec/model_training.yaml')\n", + " project.set_function(\n", + " func=\"training.yaml\",name='training',with_repo=True,kind='serving')\n", + " \n", + "- Additionally, if you want to change the default function spec values, e.g. resources, node-selector and more, and want to \n", + "make this change constant, you need to create a yaml function file and use the yaml function in the `set_function` method.\n", + "- When setting a nuclio function, the function handler is a combination of the `file_name::function_handler`, for example:\n", + " ```\n", + " project.set_function(name='nuclio',handler='multi:multi_3',kind='nuclio',image='mlrun/mlrun',with_repo=True)\n", + " ```\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "0b740ee1", + "metadata": {}, + "source": [ + "To deploy a remote function, e.g. serving and nuclio kinds, use the {py:class}`~api/mlrun.projects.deploy_function` method." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "93d0b01e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:05:25,839 [info] Starting remote function deploy\n", + "2023-05-17 09:05:26 (info) Deploying function\n", + "2023-05-17 09:05:26 (info) Building\n", + "2023-05-17 09:05:26 (info) Staging files and preparing base images\n", + "2023-05-17 09:05:26 (info) Building processor image\n", + "2023-05-17 09:06:26 (info) Build complete\n", + "2023-05-17 09:06:34 (info) Function deploy complete\n", + "> 2023-05-17 09:06:37,264 [info] successfully deployed function: {'internal_invocation_urls': ['nuclio-new-ci-cd-proj-shapira-serving-v3.default-tenant.svc.cluster.local:8080'], 'external_invocation_urls': ['new-ci-cd-proj-shapira-serving-v3-new-ci-cd-proj-shapira.default-tenant.app.cust-cs-il-3-5-2.iguazio-cd2.com/']}\n" + ] + } + ], + "source": [ + "# Serve the function\n", + "serving_func = project.deploy_function(\n", + " function=\"serving\",\n", + " models=[\n", + " {\n", + " \"key\": \"model\",\n", + " \"model_path\": train_run.outputs[\"model\"],\n", + " \"class_name\": \"mlrun.frameworks.sklearn.SklearnModelServer\",\n", + " }\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "c2192d8d", + "metadata": {}, + "outputs": [], + "source": [ + "my_data = \"\"\"{\"inputs\":[[-0.60150011, 0.51150308, 0.25701239, -1.51777297, -1.82961288,\n", + " 0.22983693, -0.40761625, 0.82325082, 1.1779216 , 1.08424275,\n", + " -0.7031145 , -0.40608979, -0.36305977, 1.28075006, 0.94445967,\n", + " 1.19105828, 1.93498414, 0.69911167, 0.50759757, 0.91565635]]}\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "a391939c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:06:37,342 [info] invoking function: {'method': 'POST', 'path': 'http://nuclio-new-ci-cd-proj-shapira-serving-v3.default-tenant.svc.cluster.local:8080/'}\n" + ] + }, + { + "data": { + "text/plain": [ + "{'id': '8ca4f4ef-7765-4d50-8a43-1e77a15e433f',\n", + " 'model_name': 'model',\n", + " 'outputs': [1]}" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serving_func.function.invoke(\"/\", my_data)" + ] + }, + { + "cell_type": "markdown", + "id": "215c539d", + "metadata": {}, + "source": [ + "## Setting a workflow\n", + "\n", + "After you completed developing your functions (in this example: `data_fetch`, `training`, and `serving`), \n", + "continue with creating a workflow that runs those functions serially. For more information about workflows and an \n", + "example of a `workflow.py` file, see {ref}`build-run-workflows-pipelines`.\n", + "\n", + "To set a workflow to a project, use the {py:class}`~mlrun.projects.MlrunProject.set_workflow` method. This method adds or \n", + "updates a workflow, and specifies a name and the code path in the project.yaml file" + ] + }, + { + "cell_type": "markdown", + "id": "682bef8f", + "metadata": {}, + "source": [ + "This example adds a workflow named main that points to a file located in
`./< project-context >/src/workflow.py`" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "0aa3c6a4", + "metadata": {}, + "outputs": [], + "source": [ + "project.set_workflow(\"main\", \"./src/workflow.py\")" + ] + }, + { + "cell_type": "markdown", + "id": "8e49ce34", + "metadata": {}, + "source": [ + "## Running a workflow using a Git source\n", + "To run the workflow, use the {py:class}`~mlrun.projects.MlrunProject.run` method. With {py:class}`~mlrun.projects.MlrunProject.run` you can run a workflow \n", + "or schedule a workflow using kubeflow pipelines by specifing the workflow name or the workflow file path.\n", + "\n", + "To specify running remote, use `remote:local` or `remote:kfp`." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "6fd0aba7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Pipeline running (id=626a345a-b67f-4eb0-9a3b-4850185ada10), click here to view the details in MLRun UI
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kfp\n", + "\n", + "\n", + "\n", + "ci-cd-tutorial-r6bsx-1763204580\n", + "\n", + "\n", + "\n", + "\n", + "deploy-serving\n", + "\n", + "\n", + "\n", + "ci-cd-tutorial-r6bsx-3399705660\n", + "\n", + "train\n", + "\n", + "\n", + "\n", + "ci-cd-tutorial-r6bsx-3399705660->ci-cd-tutorial-r6bsx-1763204580\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "ci-cd-tutorial-r6bsx-686534511\n", + "\n", + "data-fetch\n", + "\n", + "\n", + "\n", + "ci-cd-tutorial-r6bsx-686534511->ci-cd-tutorial-r6bsx-3399705660\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "

Run Results

[info] Workflow 626a345a-b67f-4eb0-9a3b-4850185ada10 finished, state=Succeeded


click the hyper links below to see detailed results
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidstartstatenameparametersresults
May 17 09:07:23completedtrain
accuracy=1.0
f1_score=1.0
precision_score=1.0
recall_score=1.0
May 17 09:06:53completeddata-fetch
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "626a345a-b67f-4eb0-9a3b-4850185ada10" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Run the workflow named main and wait for pipeline completion (watch=True)\n", + "project.run(\"main\", watch=True, engine=\"remote:kfp\")" + ] + }, + { + "cell_type": "markdown", + "id": "50678486", + "metadata": {}, + "source": [ + "### Running a scheduled workflow\n", + "\n", + "For more information about scheduling workflows, see {ref}`scheduled-jobs`." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "d4fc3f98", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:09:34,482 [warning] WARNING!, you seem to have uncommitted git changes, use .push()\n", + "> 2023-05-17 09:09:34,485 [info] executing workflow scheduling 'workflow-runner-main' remotely with kfp engine\n", + "> 2023-05-17 09:09:34,489 [info] Storing function: {'name': 'main', 'uid': '88a2eecd5cd14c339529f2c7ced3a011', 'db': 'http://mlrun-api:8080'}\n", + "> 2023-05-17 09:09:34,854 [info] task schedule modified: {'schedule': '0 * * * *', 'project': 'new-ci-cd-proj-shapira', 'name': 'main'}\n" + ] + } + ], + "source": [ + "project.run(\"main\", schedule=\"0 * * * *\")" + ] + }, + { + "cell_type": "markdown", + "id": "d65de8ea", + "metadata": {}, + "source": [ + "## Setting and registering the project artifacts" + ] + }, + { + "cell_type": "markdown", + "id": "aa57e2f4", + "metadata": {}, + "source": [ + "To register artifacts to a project, use the {py:class}`~mlrun.projects.MlrunProject.set_artifact` method. By adding/setting an artifact in the project spec, they are registered upon loading the project. \n", + "In general, use this method when you want to register an artifact when loading a project, for example:\n", + "* You developed a model artifact in the development system and you want to use this model file in production.\n", + "* There are artifacts you want to register by default when you load or create a project.\n", + "\n", + "```{admonition} Registering artifacts in multiple environments\n", + "To register artifacts in multiple environments, for example dev and production, you must upload your artifacts to a remote storage e.g. S3. You can change your project artifact path using the MLRun UI or MLRun, for example:\n", + "```\n", + "```\n", + "project.artifact_path='s3:" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Export the yaml file and save the project\n", + "project.save()" + ] + }, + { + "cell_type": "markdown", + "id": "627c8ce6", + "metadata": {}, + "source": [ + "## Creating and pushing changes to your Git repo or archive file" + ] + }, + { + "cell_type": "markdown", + "id": "7ed284b8", + "metadata": {}, + "source": [ + "### Creating a Git remote\n", + "\n", + "If you do not clone any files and you do not have any git remotes configured in your local folder you can use {py:class}`~mlrun.projects.MlrunProject.create_remote`. This method creates a git remote and adds the remote to the project as the project source.\n", + "\n", + "For example:\n", + "```\n", + "project.create_remote(url='https://github.com/mlrun/example-ci-cd.git',name='mlrun-remote',branch='master')\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a765e83f", + "metadata": {}, + "source": [ + "### Pushing changes to the Git repo\n", + "\n", + "After you made changes in your code, push your project context to GitHub repo using {py:class}`~mlrun.projects.MlrunProject.push`.\n", + "```\n", + "project.push(branch='master',message='update',add=['project.yaml','./src/data_fetch.py','./src/serving.yaml','./src/train.py','./src/workflow.py'])\n", + "```" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/projects/ci-cd-automate.md b/docs/projects/ci-cd-automate.md new file mode 100644 index 0000000000..9adaa64a1e --- /dev/null +++ b/docs/projects/ci-cd-automate.md @@ -0,0 +1,11 @@ +(ci-cd-automate)= +# CD/CD automation with Git + +**In this section** + +```{toctree} +:maxdepth: 1 + +automate-project-git-source +load-project-yaml +```` \ No newline at end of file diff --git a/docs/projects/ci-integration.md b/docs/projects/ci-integration.md index c027d94190..793abf4ea7 100644 --- a/docs/projects/ci-integration.md +++ b/docs/projects/ci-integration.md @@ -1,5 +1,5 @@ (ci-integration)= -# CI/CD integration +# Run pipelines with Github Actions, GitLab You can run your ML Pipelines using CI frameworks like Github Actions, GitLab CI/CD, etc. MLRun supports a simple and native integration with the CI systems. diff --git a/docs/projects/git-best-practices.ipynb b/docs/projects/git-best-practices.ipynb index 6f0c77ba2c..b21c11c191 100644 --- a/docs/projects/git-best-practices.ipynb +++ b/docs/projects/git-best-practices.ipynb @@ -30,7 +30,10 @@ "source": [ "```{admonition} Note\n", "This section assumes basic familiarity with version control software such as GitHub, GitLab, etc. If you're new to Git and version control, see the [GitHub Hello World documentation](https://docs.github.com/en/get-started/quickstart/hello-world).\n", - "```" + "```\n", + "\n", + "**See also**\n", + "- {ref}`ci-cd-automate`" ] }, { @@ -89,7 +92,7 @@ "MLRun supports two approaches to loading the code from Git:\n", "\n", "- Loading the code from container (default behavior)
\n", - "The image for the MLRun function is built once, and consumes the code in the repo. **This is the preferred approach for production workloads**. For example:\n", + "Before using this option, you must build the function with the {py:class}`~mlrun.projects.MlrunProject.build_function` method. The image for the MLRun function is built once, and consumes the code in the repo. **This is the preferred approach for production workloads**. For example:\n", "\n", "```python\n", "project.set_source(source=\"git://github.com/mlrun/project-archive.git\")\n", diff --git a/docs/projects/load-project-yaml.ipynb b/docs/projects/load-project-yaml.ipynb new file mode 100644 index 0000000000..5816f9da9a --- /dev/null +++ b/docs/projects/load-project-yaml.ipynb @@ -0,0 +1,1343 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "268d7741", + "metadata": {}, + "source": [ + "(load-project-yaml-from-git)=\n", + "# Load project YAML from Git, Zip, Tar source" + ] + }, + { + "cell_type": "markdown", + "id": "f263391c", + "metadata": {}, + "source": [ + "After you create your project and you have a project.yaml file with all the necessery metadata within the remote source (Git, zip or gz.tar file), you can simply load that project and run, build, and deploy your functions and workflows.\n", + "\n", + "Run the project automation in {ref}`automate-project-git-source` before you run this workbook." + ] + }, + { + "cell_type": "markdown", + "id": "48e13297", + "metadata": {}, + "source": [ + "This notebook presents the steps to load a CI/CD project in MLRun:\n", + "1. [Load a project from a remote URL](#load-a-project-from-a-remote-url)\n", + "2. [Get a function object](#get-a-function-object)\n", + "3. [Run project functions](#run-project-functions)\n", + "5. [Deploy project functions](#deploy-project-functions)\n", + "6. [Run the project workflow](#run-the-project-workflow)" + ] + }, + { + "cell_type": "markdown", + "id": "d5ca2246", + "metadata": {}, + "source": [ + "Install mlrun using ``pip install mlrun==`` or ``sh align_mlrun.sh`` (the default mlrun installer that automatically installs the server version)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0bcb3a5a", + "metadata": {}, + "outputs": [], + "source": [ + "import mlrun" + ] + }, + { + "cell_type": "markdown", + "id": "ea823444", + "metadata": {}, + "source": [ + "## Load a project from a remote URL\n", + "\n", + "This method can be used for loading an MLRun project from yaml/zip/tar/git/dir or from the MLRun DB." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ae2174b2", + "metadata": {}, + "outputs": [], + "source": [ + "# project source to load from -'git://url/org/repo.git#`.\n", + "source = \"git://github.com/mlrun/ci-cd-tutorial.git#refs/tags/v3\"" + ] + }, + { + "cell_type": "markdown", + "id": "4d3bc0fe", + "metadata": {}, + "source": [ + "**Note -** Add the git branch or refs to the source e.g.: 'git:///org/repo.git#'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "bf0977b5", + "metadata": {}, + "outputs": [], + "source": [ + "# load the project\n", + "project = mlrun.load_project(\n", + " \"./clone\", url=source, clone=True, name=\"my-load-proj\", user_project=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "94de1a60", + "metadata": {}, + "source": [ + "For example:\n", + "```\n", + "# when loading from private repo\n", + "project = mlrun.get_or_create_project(name='new-ci-cd-proj',context='./',init_git=True,secrets={\"GIT_TOKEN\":})\n", + "# when running functions in a project from a private repo\n", + "project.set_secrets({\"GIT_TOKEN\":}\n", + "```\n", + "\n", + "See {py:class}`mlrun.projects.load_project`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d1dd9bc7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "kind: project\n", + "metadata:\n", + " name: my-load-proj-shapira\n", + " created: '2023-04-17T13:27:10.756000'\n", + "spec:\n", + " functions:\n", + " - url: ./src/data_fetch.py\n", + " name: data-fetch\n", + " kind: job\n", + " image: mlrun/mlrun\n", + " handler: data_fetch\n", + " with_repo: true\n", + " tag: v2\n", + " - url: ./src/train.py\n", + " name: train\n", + " kind: job\n", + " image: mlrun/mlrun\n", + " handler: train\n", + " with_repo: true\n", + " tag: v2\n", + " - url: ./function_spec/serving.yaml\n", + " name: serving\n", + " workflows:\n", + " - path: ./src/workflow.py\n", + " name: main\n", + " artifacts:\n", + " - kind: model\n", + " metadata:\n", + " project: new-ci-cd-proj-shapira\n", + " key: model-test\n", + " spec:\n", + " target_path: v3io:///projects/new-ci-cd-proj-shapira/artifacts/a5d545c6-fd5d-44e8-966c-24b9261314be/train/0/model/\n", + " model_file: model.pkl\n", + " status:\n", + " state: created\n", + " conda: ''\n", + " source: git://github.com/GiladShapira94/example-ci-cd.git#refs/heads/v2\n", + " origin_url: git://github.com/GiladShapira94/example-ci-cd.git#refs/heads/v2\n", + " load_source_on_run: true\n", + " desired_state: online\n", + "status:\n", + " state: online\n", + "\n" + ] + } + ], + "source": [ + "# print project yaml\n", + "print(project.to_yaml())" + ] + }, + { + "cell_type": "markdown", + "id": "4676c527", + "metadata": {}, + "source": [ + "## Get a function object \n", + "Get the function object using the {py:class}`~mlrun.projects.MlrunProject.get_function` method.\n", + "\n", + "This method allows you to get a function object based on the metadata in your project YAML file or from MLRun DB.\n", + "````\n", + "serving_func = project.get_function('')\n", + "````" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "37743f5b", + "metadata": {}, + "outputs": [], + "source": [ + "serving_func = project.get_function(\"serving\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "035cf688", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serving_func.add_model(\n", + " key=\"model\",\n", + " model_path=train_run.outputs[\"model\"],\n", + " class_name=\"mlrun.frameworks.sklearn.SklearnModelServer\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8acbf7c2", + "metadata": {}, + "source": [ + "```{admonition} Tip: Changing the model file path\n", + "This serving function points to a model file whose path is stored in the function spec. If you want to change it (for example, to use a newer model file) you need to add the model to the function object and then deploy the function, or alternately, change the function.yaml in the remote source:\n", + "\n", + "\n", + " serving_func = project.get_function('serving')\n", + " serving_func.add_model(key='model',model_path=train_run.outputs[\"model\"],\n", + " class_name='mlrun.frameworks.sklearn.SklearnModelServer')\n", + " serving_dep = project.deploy_function('serving')\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "89ea2afb", + "metadata": {}, + "source": [ + "Test your serving function locally before deploying it." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "0ecd1cf6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:19:19,976 [warning] run command, file or code were not specified\n", + "> 2023-05-17 09:19:20,579 [info] model model was loaded\n", + "> 2023-05-17 09:19:20,580 [info] Loaded ['model']\n" + ] + } + ], + "source": [ + "serving_server = serving_func.to_mock_server()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "bd901ebd", + "metadata": {}, + "outputs": [], + "source": [ + "my_data = \"\"\"{\"inputs\":[[-0.60150011, 0.51150308, 0.25701239, -1.51777297, -1.82961288,\n", + " 0.22983693, -0.40761625, 0.82325082, 1.1779216 , 1.08424275,\n", + " -0.7031145 , -0.40608979, -0.36305977, 1.28075006, 0.94445967,\n", + " 1.19105828, 1.93498414, 0.69911167, 0.50759757, 0.91565635]]}\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "fc3766cc", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "X does not have valid feature names, but GradientBoostingClassifier was fitted with feature names\n" + ] + }, + { + "data": { + "text/plain": [ + "{'id': '70c310d8fc10420fa9887546623b0ee0',\n", + " 'model_name': 'model',\n", + " 'outputs': [1]}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serving_server.test(\"/\", my_data)" + ] + }, + { + "cell_type": "markdown", + "id": "a98c2ddc", + "metadata": {}, + "source": [ + "## Run project functions \n", + "Run the function using the {py:class}`~mlrun.projects.MlrunProject.run_function` method both to \n", + "[run jobs locally](./automate-project-git-source.html#running-the-function-locally) \n", + "and, run remotely on the [runtime/cluster](./automate-project-git-source.html#running-the-function-remotely-on-your-cluster). If \n", + "there are any requirements you need to build a new \n", + "image before you run a function. See more details in {ref}`build-function-image`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e9c7fdd5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:15:38,824 [info] Storing function: {'name': 'data-fetch-data-fetch', 'uid': '5bd1b1e535894b1385ed1d6d33180741', 'db': 'http://mlrun-api:8080'}\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
my-load-proj-shapira0May 17 09:15:38completeddata-fetch-data-fetch
v3io_user=shapira
kind=
owner=shapira
host=jupyter-shapira-7fc985f9db-cp8x9
release=v2
train-dataset
test-dataset
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + " > to track results use the .show() or .logs() methods or click here to open in UI" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:15:42,712 [info] run executed, status=completed: {'name': 'data-fetch-data-fetch'}\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "project.run_function(\n", + " function=\"data-fetch\", local=True, returns=[\"train-dataset\", \"test-dataset\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ba32d177", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:15:42,766 [info] Storing function: {'name': 'data-fetch-data-fetch', 'uid': 'bb814e47e2cd433b8820f19c782fb8af', 'db': 'http://mlrun-api:8080'}\n", + "> 2023-05-17 09:15:43,048 [info] Job is running in the background, pod: data-fetch-data-fetch-q774n\n", + "final state: completed\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
my-load-proj-shapira0May 17 09:15:47completeddata-fetch-data-fetch
v3io_user=shapira
kind=job
owner=shapira
mlrun/client_version=1.3.1-rc5
mlrun/client_python_version=3.7.6
host=data-fetch-data-fetch-q774n
release=v2
train-dataset
test-dataset
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + " > to track results use the .show() or .logs() methods or click here to open in UI" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:15:56,204 [info] run executed, status=completed: {'name': 'data-fetch-data-fetch'}\n" + ] + } + ], + "source": [ + "data_fetch_run = project.run_function(\n", + " function=\"data-fetch\", local=False, returns=[\"train-dataset\", \"test-dataset\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "6d206644", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:15:56,355 [info] Storing function: {'name': 'train-train', 'uid': 'b0b6137768c74af2b115b4399ee596e5', 'db': 'http://mlrun-api:8080'}\n", + "> 2023-05-17 09:15:56,743 [info] Job is running in the background, pod: train-train-vzxw9\n", + "final state: completed\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
my-load-proj-shapira0May 17 09:16:02completedtrain-train
v3io_user=shapira
kind=job
owner=shapira
mlrun/client_version=1.3.1-rc5
mlrun/client_python_version=3.7.6
host=train-train-vzxw9
release=v2
train_data
test_data
accuracy=0.85
f1_score=0.88
precision_score=0.7857142857142857
recall_score=1.0
feature-importance
test_set
confusion-matrix
roc-curves
calibration-curve
model
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + " > to track results use the .show() or .logs() methods or click here to open in UI" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:16:18,044 [info] run executed, status=completed: {'name': 'train-train'}\n" + ] + } + ], + "source": [ + "train_run = project.run_function(\n", + " function=\"train\",\n", + " inputs={\n", + " \"train_data\": data_fetch_run.outputs[\"train-dataset\"],\n", + " \"test_data\": data_fetch_run.outputs[\"test-dataset\"],\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "122cd04e", + "metadata": {}, + "source": [ + "## Deploy project functions\n", + "To deploy a remote function e.g. nuclio or serving function, use the {py:class}`~mlrun.projects.MlrunProject.deploy_function` method. \n", + "You must use this method before invoking Nuclio or serving functions.\n", + "````\n", + "nuclio_func=project.deploy_function(function='')\n", + "\n", + "nuclio_func.function.invoke('/',{'int':4})\n", + "````" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "89950a6e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:19:25,799 [info] Starting remote function deploy\n", + "2023-05-17 09:19:26 (info) Deploying function\n", + "2023-05-17 09:19:26 (info) Building\n", + "2023-05-17 09:19:26 (info) Staging files and preparing base images\n", + "2023-05-17 09:19:26 (info) Building processor image\n", + "2023-05-17 09:20:41 (info) Build complete\n", + "2023-05-17 09:21:19 (info) Function deploy complete\n", + "> 2023-05-17 09:21:27,112 [info] successfully deployed function: {'internal_invocation_urls': ['nuclio-my-load-proj-shapira-serving-v2.default-tenant.svc.cluster.local:8080'], 'external_invocation_urls': ['my-load-proj-shapira-serving-v2-my-load-proj-shapira.default-tenant.app.cust-cs-il-3-5-2.iguazio-cd2.com/']}\n" + ] + } + ], + "source": [ + "serving_dep = project.deploy_function(\"serving\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "8ea18d9d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:21:27,192 [info] invoking function: {'method': 'POST', 'path': 'http://nuclio-my-load-proj-shapira-serving-v2.default-tenant.svc.cluster.local:8080/'}\n" + ] + }, + { + "data": { + "text/plain": [ + "{'id': 'efb4e274-00c2-428d-b462-92222bc64ce5',\n", + " 'model_name': 'model',\n", + " 'outputs': [1]}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serving_dep.function.invoke(\"/\", my_data)" + ] + }, + { + "cell_type": "markdown", + "id": "2d1786c6", + "metadata": {}, + "source": [ + "## Run the project workflow" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "1a89c5e6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Pipeline running (id=b6ebe4fd-457e-4992-8eb5-a1b70fc44b94), click here to view the details in MLRun UI
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "kfp\n", + "\n", + "\n", + "\n", + "ci-cd-tutorial-nplzh-1091444859\n", + "\n", + "\n", + "\n", + "\n", + "deploy-serving\n", + "\n", + "\n", + "\n", + "ci-cd-tutorial-nplzh-1597241585\n", + "\n", + "train\n", + "\n", + "\n", + "\n", + "ci-cd-tutorial-nplzh-1597241585->ci-cd-tutorial-nplzh-1091444859\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "ci-cd-tutorial-nplzh-604068056\n", + "\n", + "data-fetch\n", + "\n", + "\n", + "\n", + "ci-cd-tutorial-nplzh-604068056->ci-cd-tutorial-nplzh-1597241585\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "

Run Results

[info] Workflow b6ebe4fd-457e-4992-8eb5-a1b70fc44b94 finished, state=Succeeded


click the hyper links below to see detailed results
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidstartstatenameparametersresults
May 17 09:22:14completedtrain
accuracy=0.8
f1_score=0.7999999999999999
precision_score=0.7272727272727273
recall_score=0.8888888888888888
May 17 09:21:43completeddata-fetch
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "b6ebe4fd-457e-4992-8eb5-a1b70fc44b94" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# run the workflow named main and wait for the pipeline completion (watch=True)\n", + "project.run(\"main\", watch=True, engine=\"remote:kfp\")" + ] + }, + { + "cell_type": "markdown", + "id": "c300faa6", + "metadata": {}, + "source": [ + "## Run a scheduled workflow\n", + "\n", + "For more information about scheduling workflows, see {ref}`scheduled-jobs`." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "0130d760", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2023-05-17 09:24:14,370 [warning] WARNING!, you seem to have uncommitted git changes, use .push()\n", + "> 2023-05-17 09:24:14,373 [info] executing workflow scheduling 'workflow-runner-main' remotely with kfp engine\n", + "> 2023-05-17 09:24:14,377 [info] Storing function: {'name': 'main', 'uid': 'ff401cc316574c4ea94043ddcbab3a9e', 'db': 'http://mlrun-api:8080'}\n", + "> 2023-05-17 09:24:14,966 [info] task schedule created: {'schedule': '0 * * * *', 'project': 'my-load-proj-shapira', 'name': 'main'}\n" + ] + } + ], + "source": [ + "project.run(\"main\", watch=True, schedule=\"0 * * * *\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/projects/project.md b/docs/projects/project.md index 9de2cdf8e5..d2fddf9824 100644 --- a/docs/projects/project.md +++ b/docs/projects/project.md @@ -21,6 +21,7 @@ git-best-practices load-project run-build-deploy build-run-workflows-pipelines +ci-cd-automate ci-integration ../secrets ``` diff --git a/docs/runtimes/create-and-use-functions.ipynb b/docs/runtimes/create-and-use-functions.ipynb index 800becb932..f5a3d3e83f 100644 --- a/docs/runtimes/create-and-use-functions.ipynb +++ b/docs/runtimes/create-and-use-functions.ipynb @@ -155,7 +155,7 @@ "\n", "- **requirements:** Additional Python dependencies needed for the function to run. Using this parameter results in a new Docker image (using the `image` parameter as a base image). This can be a list of Python dependencies or a path to a `requirements.txt` file.\n", "\n", - "- **with_repo:** Whether a function requires additional files or dependencies within a Git repo or archive file. This Git repo or archive file is specified on a project level via `project.set_source(...)`, which the function consumes. If this parameter is omitted, the default is `False`." + "- **with_repo:** Set to `True` if the function requires additional files or dependencies within a Git repo or archive file. This Git repo or archive file is specified on a project level via `project.set_source(...)`, which the function consumes. If this parameter is omitted, the default is `False`." ] }, { diff --git a/docs/runtimes/images.md b/docs/runtimes/images.md index 81d9c2dcc0..7cdf9d89e5 100644 --- a/docs/runtimes/images.md +++ b/docs/runtimes/images.md @@ -10,7 +10,7 @@ Every release of MLRun includes several images for different usages. The build a ## Using images -See [Kubernetes Jobs & Images](./mlrun_jobs.html). +See {ref}`build-function-image`. ## MLRun images and how to build them diff --git a/docs/secrets.md b/docs/secrets.md index 080519dac9..31b6c1a950 100644 --- a/docs/secrets.md +++ b/docs/secrets.md @@ -1,5 +1,5 @@ (secrets)= -# Working with secrets +# Working with secrets When executing jobs through MLRun, the code might need access to specific secrets, for example to access data residing on a data-store that requires credentials (such as a private S3 bucket), or many other similar needs. diff --git a/docs/store/artifacts.md b/docs/store/artifacts.md index 465cca8d5b..696895f931 100644 --- a/docs/store/artifacts.md +++ b/docs/store/artifacts.md @@ -3,12 +3,6 @@ An artifact is any data that is produced and/or consumed by functions, jobs, or pipelines. -Artifacts metadata is stored in the project's database. The main types of artifacts are: -- **Files** — files, directories, images, figures, and plotlines -- **Datasets** — any data, such as tables and DataFrames -- **Models** — all trained models -- **Feature Store Objects** — Feature sets and feature vectors - **In this section** - [Viewing artifacts](#viewing-artifacts) - [Artifact path](#artifact-path) @@ -16,6 +10,19 @@ Artifacts metadata is stored in the project's database. The main types of artifa - [Artifact URIs, versioning, and metadata](#artifact-uris-versioning-and-metadata) - [See also](#see-also) +## Types of artifacts + +Artifacts metadata is stored in the project's database. The main types of artifacts are: +- **Files** — files, directories, images, figures, and plotlines + - "dir": mlrun.artifacts.DirArtifact + - "plot": mlrun.artifacts.PlotArtifact + - "link": mlrun.artifacts.LinkArtifact +- **Models** — all trained models + - "model": mlrun.artifacts.ModelArtifact +- **Feature Store Objects** — Feature sets and feature vectors + - "dataset": mlrun.artifacts.DatasetArtifact + + "plotly": mlrun.artifacts.PlotlyArtifact, ## Viewing artifacts diff --git a/mlrun/__main__.py b/mlrun/__main__.py index ae86118546..c394a517c5 100644 --- a/mlrun/__main__.py +++ b/mlrun/__main__.py @@ -763,7 +763,7 @@ def get(kind, name, selector, namespace, uid, project, tag, db, extra_args): name, project=project, tag=tag, labels=selector ) df = artifacts.to_df()[ - ["tree", "key", "iter", "kind", "path", "hash", "updated"] + ["tree", "key", "iter", "kind", "path", "hash", "updated", "uri"] ] df["tree"] = df["tree"].apply(lambda x: f"..{x[-8:]}") df["hash"] = df["hash"].apply(lambda x: f"..{x[-6:]}") @@ -1089,25 +1089,30 @@ def project( if db: mlconf.dbpath = db - proj = load_project(context, url, name, init_git=init_git, clone=clone, save=save) + # set the CLI/GIT parameters in load_project() so they can be used by project setup scripts + parameters = fill_params(param) if param else {} + if git_repo: + parameters["git_repo"] = git_repo + if git_issue: + parameters["git_issue"] = git_issue + commit = environ.get("GITHUB_SHA") or environ.get("CI_COMMIT_SHA") + if commit and not parameters.get("commit_id"): + parameters["commit_id"] = commit + + proj = load_project( + context, + url, + name, + init_git=init_git, + clone=clone, + save=save, + parameters=parameters, + ) url_str = " from " + url if url else "" print(f"Loading project {proj.name}{url_str} into {context}:\n") if is_relative_path(artifact_path): artifact_path = path.abspath(artifact_path) - if param: - proj.spec.params = fill_params(param, proj.spec.params) - if git_repo: - proj.spec.params["git_repo"] = git_repo - if git_issue: - proj.spec.params["git_issue"] = git_issue - commit = ( - proj.get_param("commit_id") - or environ.get("GITHUB_SHA") - or environ.get("CI_COMMIT_SHA") - ) - if commit: - proj.spec.params["commit_id"] = commit if secrets: secrets = line2keylist(secrets, "kind", "source") secret_store = SecretsStore.from_list(secrets) diff --git a/mlrun/api/api/endpoints/frontend_spec.py b/mlrun/api/api/endpoints/frontend_spec.py index 6711a17673..46427932de 100644 --- a/mlrun/api/api/endpoints/frontend_spec.py +++ b/mlrun/api/api/endpoints/frontend_spec.py @@ -20,6 +20,7 @@ import mlrun.api.api.deps import mlrun.api.utils.builder import mlrun.api.utils.clients.iguazio +import mlrun.api.utils.runtimes.nuclio import mlrun.common.schemas import mlrun.runtimes import mlrun.runtimes.utils @@ -117,7 +118,7 @@ def _resolve_feature_flags() -> mlrun.common.schemas.FeatureFlags: nuclio_streams = mlrun.common.schemas.NuclioStreamsFeatureFlag.disabled if mlrun.mlconf.get_parsed_igz_version() and semver.VersionInfo.parse( - mlrun.runtimes.utils.resolve_nuclio_version() + mlrun.api.utils.runtimes.nuclio.resolve_nuclio_version() ) >= semver.VersionInfo.parse("1.7.8"): nuclio_streams = mlrun.common.schemas.NuclioStreamsFeatureFlag.enabled diff --git a/mlrun/api/api/endpoints/functions.py b/mlrun/api/api/endpoints/functions.py index 69d255c969..028016be97 100644 --- a/mlrun/api/api/endpoints/functions.py +++ b/mlrun/api/api/endpoints/functions.py @@ -448,21 +448,21 @@ async def build_status( def _handle_job_deploy_status( - db_session, - fn, - name, - project, - tag, - offset, - logs, + db_session: Session, + fn: dict, + name: str, + project: str, + tag: str, + offset: int, + logs: bool, ): # job deploy status - state = get_in(fn, "status.state", "") + function_state = get_in(fn, "status.state", "") pod = get_in(fn, "status.build_pod", "") image = get_in(fn, "spec.build.image", "") out = b"" if not pod: - if state == mlrun.common.schemas.FunctionState.ready: + if function_state == mlrun.common.schemas.FunctionState.ready: # when the function has been built we set the created image into the `spec.image` for reference see at the # end of the function where we resolve if the status is ready and then set the spec.build.image to # spec.image @@ -474,17 +474,19 @@ def _handle_job_deploy_status( content=out, media_type="text/plain", headers={ - "function_status": state, + "function_status": function_state, "function_image": image, "builder_pod": pod, }, ) # read from log file - terminal_states = ["failed", "error", "ready"] log_file = log_path(project, f"build_{name}__{tag or 'latest'}") - if state in terminal_states and log_file.exists(): - if state == mlrun.common.schemas.FunctionState.ready: + if ( + function_state in mlrun.common.schemas.FunctionState.terminal_states() + and log_file.exists() + ): + if function_state == mlrun.common.schemas.FunctionState.ready: # when the function has been built we set the created image into the `spec.image` for reference see at the # end of the function where we resolve if the status is ready and then set the spec.build.image to # spec.image @@ -500,40 +502,65 @@ def _handle_job_deploy_status( content=out, media_type="text/plain", headers={ - "x-mlrun-function-status": state, - "function_status": state, + "x-mlrun-function-status": function_state, + "function_status": function_state, "function_image": image, "builder_pod": pod, }, ) - # TODO: change state to pod_status - state = mlrun.api.utils.singletons.k8s.get_k8s_helper(silent=False).get_pod_status( - pod + build_pod_state = mlrun.api.utils.singletons.k8s.get_k8s_helper( + silent=False + ).get_pod_status(pod) + logger.debug( + "Resolved pod status", + function_name=name, + pod_status=build_pod_state, + pod_name=pod, ) - logger.info("Resolved pod status", pod_status=state, pod_name=pod) - if state == "succeeded": - logger.info("Build completed successfully") - state = mlrun.common.schemas.FunctionState.ready - if state in ["failed", "error"]: - logger.error("Build failed", pod_name=pod, pod_status=state) - state = mlrun.common.schemas.FunctionState.error + normalized_pod_function_state = ( + mlrun.common.schemas.FunctionState.get_function_state_from_pod_state( + build_pod_state + ) + ) + if normalized_pod_function_state == mlrun.common.schemas.FunctionState.ready: + logger.info( + "Build completed successfully", + function_name=name, + pod=pod, + pod_state=build_pod_state, + ) + elif normalized_pod_function_state == mlrun.common.schemas.FunctionState.error: + logger.error( + "Build failed", function_name=name, pod_name=pod, pod_status=build_pod_state + ) - if (logs and state != "pending") or state in terminal_states: + if ( + ( + logs + and normalized_pod_function_state + != mlrun.common.schemas.FunctionState.pending + ) + or normalized_pod_function_state + in mlrun.common.schemas.FunctionState.terminal_states() + ): try: resp = mlrun.api.utils.singletons.k8s.get_k8s_helper(silent=False).logs(pod) except ApiException as exc: logger.warning( "Failed to get build logs", function_name=name, - function_state=state, + function_state=normalized_pod_function_state, pod=pod, exc_info=exc, ) resp = "" - if state in terminal_states: + if ( + normalized_pod_function_state + in mlrun.common.schemas.FunctionState.terminal_states() + ): # TODO: move to log collector log_file.parent.mkdir(parents=True, exist_ok=True) with log_file.open("wb") as fp: @@ -543,28 +570,31 @@ def _handle_job_deploy_status( # begin from the offset number and then encode out = resp[offset:].encode() - update_in(fn, "status.state", state) - if state == mlrun.common.schemas.FunctionState.ready: - update_in(fn, "spec.image", image) + # check if the previous function state is different from the current build pod state, if that is the case then + # update the function and store to the database + if function_state != normalized_pod_function_state: + update_in(fn, "status.state", normalized_pod_function_state) - versioned = False - if state == mlrun.common.schemas.FunctionState.ready: - versioned = True - mlrun.api.crud.Functions().store_function( - db_session, - fn, - name, - project, - tag, - versioned=versioned, - ) + versioned = False + if normalized_pod_function_state == mlrun.common.schemas.FunctionState.ready: + update_in(fn, "spec.image", image) + versioned = True + + mlrun.api.crud.Functions().store_function( + db_session, + fn, + name, + project, + tag, + versioned=versioned, + ) return Response( content=out, media_type="text/plain", headers={ - "x-mlrun-function-status": state, - "function_status": state, + "x-mlrun-function-status": normalized_pod_function_state, + "function_status": normalized_pod_function_state, "function_image": image, "builder_pod": pod, }, diff --git a/mlrun/api/api/utils.py b/mlrun/api/api/utils.py index c458af3d59..0eb73eab44 100644 --- a/mlrun/api/api/utils.py +++ b/mlrun/api/api/utils.py @@ -28,6 +28,7 @@ from sqlalchemy.orm import Session import mlrun.api.crud +import mlrun.api.db.base import mlrun.api.utils.auth.verifier import mlrun.api.utils.clients.iguazio import mlrun.api.utils.singletons.k8s @@ -245,14 +246,43 @@ def mask_notification_params_with_secret( return notification_object -def unmask_notification_params_secret_on_task(run): +def unmask_notification_params_secret_on_task( + db: mlrun.api.db.base.DBInterface, + db_session: Session, + run: typing.Union[dict, mlrun.model.RunObject], +): if isinstance(run, dict): run = mlrun.model.RunObject.from_dict(run) - run.spec.notifications = [ - unmask_notification_params_secret(run.metadata.project, notification) - for notification in run.spec.notifications - ] + notifications = [] + for notification in run.spec.notifications: + invalid_notifications = [] + try: + notifications.append( + unmask_notification_params_secret(run.metadata.project, notification) + ) + except Exception as exc: + logger.warning( + "Failed to unmask notification params, notification will not be sent", + project=run.metadata.project, + run_uid=run.metadata.uid, + notification=notification.name, + exc=err_to_str(exc), + ) + # set error status in order to later save the db + notification.status = mlrun.common.schemas.NotificationStatus.ERROR + invalid_notifications.append(notification) + + if invalid_notifications: + db.store_run_notifications( + db_session, + invalid_notifications, + run.metadata.uid, + run.metadata.project, + ) + + run.spec.notifications = notifications + return run diff --git a/mlrun/api/crud/client_spec.py b/mlrun/api/crud/client_spec.py index 16d0e6faec..6f3131a195 100644 --- a/mlrun/api/crud/client_spec.py +++ b/mlrun/api/crud/client_spec.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import mlrun.api.utils.runtimes.nuclio import mlrun.common.schemas import mlrun.utils.singleton from mlrun.config import Config, config, default_config -from mlrun.runtimes.utils import resolve_mpijob_crd_version, resolve_nuclio_version +from mlrun.runtimes.utils import resolve_mpijob_crd_version class ClientSpec( @@ -44,7 +45,7 @@ def get_client_spec( config.dask_kfp_image, client_version, client_python_version ), api_url=config.httpdb.api_url, - nuclio_version=resolve_nuclio_version(), + nuclio_version=mlrun.api.utils.runtimes.nuclio.resolve_nuclio_version(), spark_operator_version=config.spark_operator_version, calculate_artifact_hash=config.artifacts.calculate_hash, generate_artifact_target_path_from_artifact_hash=config.artifacts.generate_target_path_from_artifact_hash, diff --git a/mlrun/api/crud/feature_store.py b/mlrun/api/crud/feature_store.py index f4eedb5037..55e222f749 100644 --- a/mlrun/api/crud/feature_store.py +++ b/mlrun/api/crud/feature_store.py @@ -35,6 +35,9 @@ def create_feature_set( feature_set: mlrun.common.schemas.FeatureSet, versioned: bool = True, ) -> str: + if not feature_set.spec.engine: + feature_set.spec.engine = "storey" + return self._create_object( db_session, project, @@ -52,6 +55,9 @@ def store_feature_set( uid: typing.Optional[str] = None, versioned: bool = True, ) -> str: + if not feature_set.spec.engine: + feature_set.spec.engine = "storey" + return self._store_object( db_session, project, diff --git a/mlrun/api/crud/pipelines.py b/mlrun/api/crud/pipelines.py index 60d62069e1..acc5e2ea1f 100644 --- a/mlrun/api/crud/pipelines.py +++ b/mlrun/api/crud/pipelines.py @@ -59,12 +59,8 @@ def list_pipelines( raise mlrun.errors.MLRunInvalidArgumentError( "Summary format is not supported for list pipelines, use get instead" ) - kfp_url = mlrun.mlconf.resolve_kfp_url(namespace) - if not kfp_url: - raise mlrun.errors.MLRunNotFoundError( - "KubeFlow Pipelines is not configured" - ) - kfp_client = kfp.Client(host=kfp_url) + + kfp_client = self.initialize_kfp_client(namespace) if project != "*": run_dicts = [] while page_token is not None: @@ -102,6 +98,36 @@ def list_pipelines( return total_size, next_page_token, runs + def delete_pipelines_runs(self, db_session: sqlalchemy.orm.Session, project: str): + _, _, project_pipeline_runs = self.list_pipelines( + db_session=db_session, + project=project, + format_=mlrun.common.schemas.PipelinesFormat.metadata_only, + ) + kfp_client = self.initialize_kfp_client() + + if project_pipeline_runs: + logger.debug( + "Detected pipeline runs for project, deleting them", + project=project, + pipeline_run_ids=[run["id"] for run in project_pipeline_runs], + ) + + for pipeline_run in project_pipeline_runs: + try: + # delete pipeline run also terminates it if it is in progress + kfp_client._run_api.delete_run(pipeline_run["id"]) + except Exception as exc: + # we don't want to fail the entire delete operation if we failed to delete a single pipeline run + # so it won't fail the delete project operation. we will log the error and continue + logger.warning( + "Failed to delete pipeline run", + project=project, + pipeline_run_id=pipeline_run["id"], + exc_info=exc, + ) + logger.debug("Finished deleting pipeline runs", project=project) + def get_pipeline( self, db_session: sqlalchemy.orm.Session, @@ -110,12 +136,7 @@ def get_pipeline( namespace: typing.Optional[str] = None, format_: mlrun.common.schemas.PipelinesFormat = mlrun.common.schemas.PipelinesFormat.summary, ): - kfp_url = mlrun.mlconf.resolve_kfp_url(namespace) - if not kfp_url: - raise mlrun.errors.MLRunBadRequestError( - "KubeFlow Pipelines is not configured" - ) - kfp_client = kfp.Client(host=kfp_url) + kfp_client = self.initialize_kfp_client(namespace) run = None try: api_run_detail = kfp_client.get_run(run_id) @@ -176,12 +197,7 @@ def create_pipeline( ) try: - kfp_url = mlrun.mlconf.resolve_kfp_url(namespace) - if not kfp_url: - raise mlrun.errors.MLRunBadRequestError( - "KubeFlow Pipelines is not configured" - ) - kfp_client = kfp.Client(host=kfp_url) + kfp_client = self.initialize_kfp_client(namespace) experiment = kfp_client.create_experiment(name=experiment_name) run = kfp_client.run_pipeline( experiment.id, run_name, pipeline_file.name, params=arguments @@ -200,6 +216,15 @@ def create_pipeline( return run + @staticmethod + def initialize_kfp_client(namespace: typing.Optional[str] = None) -> kfp.Client: + kfp_url = mlrun.mlconf.resolve_kfp_url(namespace) + if not kfp_url: + raise mlrun.errors.MLRunNotFoundError( + "KubeFlow Pipelines is not configured" + ) + return kfp.Client(host=kfp_url) + def _format_runs( self, db_session: sqlalchemy.orm.Session, @@ -222,22 +247,8 @@ def _format_run( if format_ == mlrun.common.schemas.PipelinesFormat.full: return run elif format_ == mlrun.common.schemas.PipelinesFormat.metadata_only: - return { - k: str(v) if v is not None else v - for k, v in run.items() - if k - in [ - "id", - "name", - "project", - "status", - "error", - "created_at", - "scheduled_at", - "finished_at", - "description", - ] - } + return mlrun.utils.helpers.format_run(run, with_project=True) + elif format_ == mlrun.common.schemas.PipelinesFormat.name_only: return run.get("name") elif format_ == mlrun.common.schemas.PipelinesFormat.summary: diff --git a/mlrun/api/crud/projects.py b/mlrun/api/crud/projects.py index 6f552df133..38907c1456 100644 --- a/mlrun/api/crud/projects.py +++ b/mlrun/api/crud/projects.py @@ -24,7 +24,7 @@ import mlrun.api.crud import mlrun.api.db.session import mlrun.api.utils.events.events_factory as events_factory -import mlrun.api.utils.projects.remotes.follower +import mlrun.api.utils.projects.remotes.follower as project_follower import mlrun.api.utils.singletons.db import mlrun.api.utils.singletons.k8s import mlrun.api.utils.singletons.scheduler @@ -35,7 +35,7 @@ class Projects( - mlrun.api.utils.projects.remotes.follower.Member, + project_follower.Member, metaclass=mlrun.utils.singleton.AbstractSingleton, ): def __init__(self) -> None: @@ -159,6 +159,11 @@ def delete_project_resources( label_selector=f"mlrun/project={name}", force=True, ) + if mlrun.mlconf.resolve_kfp_url(): + logger.debug("Removing KFP pipelines runs for project", project=name) + mlrun.api.crud.pipelines.Pipelines().delete_pipelines_runs( + db_session=session, project=name + ) # log collector service will delete the logs, so we don't need to do it here if ( @@ -180,14 +185,26 @@ def delete_project_resources( secrets = None ( secret_name, - _, + action, ) = mlrun.api.utils.singletons.k8s.get_k8s_helper().delete_project_secrets( name, secrets ) - events_client = events_factory.EventsFactory().get_events_client() - events_client.emit( - events_client.generate_project_secret_deleted_event(name, secret_name) - ) + if action: + events_client = events_factory.EventsFactory().get_events_client() + events_client.emit( + events_client.generate_project_secret_event( + name, + secret_name, + action=action, + ) + ) + + else: + logger.debug( + "No project secrets to delete", + action=action, + secret_name=secret_name, + ) def get_project( self, session: sqlalchemy.orm.Session, name: str diff --git a/mlrun/api/crud/runtimes/nuclio/function.py b/mlrun/api/crud/runtimes/nuclio/function.py index f26b1fa5eb..0301fcb00d 100644 --- a/mlrun/api/crud/runtimes/nuclio/function.py +++ b/mlrun/api/crud/runtimes/nuclio/function.py @@ -424,6 +424,11 @@ def _set_misc_specs(function, nuclio_spec): nuclio_spec.set_config( "spec.readinessTimeoutSeconds", function.spec.readiness_timeout ) + if function.spec.readiness_timeout_before_failure: + nuclio_spec.set_config( + "spec.waitReadinessTimeoutBeforeFailure", + function.spec.readiness_timeout_before_failure, + ) if function.spec.resources: nuclio_spec.set_config("spec.resources", function.spec.resources) diff --git a/mlrun/api/crud/runtimes/nuclio/helpers.py b/mlrun/api/crud/runtimes/nuclio/helpers.py index 6ddd33bef0..e0eca3806f 100644 --- a/mlrun/api/crud/runtimes/nuclio/helpers.py +++ b/mlrun/api/crud/runtimes/nuclio/helpers.py @@ -17,6 +17,8 @@ import semver import mlrun +import mlrun.api.utils.clients.nuclio +import mlrun.api.utils.runtimes.nuclio import mlrun.api.utils.singletons.k8s import mlrun.runtimes from mlrun.utils import logger @@ -190,7 +192,9 @@ def is_nuclio_version_in_range(min_version: str, max_version: str) -> bool: try: parsed_min_version = semver.VersionInfo.parse(min_version) parsed_max_version = semver.VersionInfo.parse(max_version) - resolved_nuclio_version = mlrun.runtimes.utils.resolve_nuclio_version() + resolved_nuclio_version = ( + mlrun.api.utils.runtimes.nuclio.resolve_nuclio_version() + ) parsed_current_version = semver.VersionInfo.parse(resolved_nuclio_version) except ValueError: logger.warning( diff --git a/mlrun/api/crud/secrets.py b/mlrun/api/crud/secrets.py index 58d973941b..1f61da5610 100644 --- a/mlrun/api/crud/secrets.py +++ b/mlrun/api/crud/secrets.py @@ -107,22 +107,22 @@ def store_project_secrets( if mlrun.api.utils.singletons.k8s.get_k8s_helper(): ( secret_name, - created, + action, ) = mlrun.api.utils.singletons.k8s.get_k8s_helper().store_project_secrets( project, secrets_to_store ) secret_keys = [secret_name for secret_name in secrets_to_store.keys()] - events_client = events_factory.EventsFactory().get_events_client() - event = events_client.generate_project_secret_event( - project=project, - secret_name=secret_name, - secret_keys=secret_keys, - action=mlrun.common.schemas.SecretEventActions.created - if created - else mlrun.common.schemas.SecretEventActions.updated, - ) - events_client.emit(event) + if action: + events_client = events_factory.EventsFactory().get_events_client() + event = events_client.generate_project_secret_event( + project=project, + secret_name=secret_name, + secret_keys=secret_keys, + action=action, + ) + events_client.emit(event) + else: raise mlrun.errors.MLRunInternalServerError( "K8s provider cannot be initialized" @@ -161,20 +161,19 @@ def store_auth_secret( ) ( auth_secret_name, - created, + action, ) = mlrun.api.utils.singletons.k8s.get_k8s_helper().store_auth_secret( secret.username, secret.access_key ) - events_client = events_factory.EventsFactory().get_events_client() - event = events_client.generate_project_auth_secret_event( - username=secret.username, - secret_name=auth_secret_name, - action=mlrun.common.schemas.SecretEventActions.created - if created - else mlrun.common.schemas.SecretEventActions.updated, - ) - events_client.emit(event) + if action: + events_client = events_factory.EventsFactory().get_events_client() + event = events_client.generate_auth_secret_event( + username=secret.username, + secret_name=auth_secret_name, + action=action, + ) + events_client.emit(event) return auth_secret_name @@ -226,21 +225,20 @@ def delete_project_secrets( if mlrun.api.utils.singletons.k8s.get_k8s_helper(): ( secret_name, - deleted, + action, ) = mlrun.api.utils.singletons.k8s.get_k8s_helper().delete_project_secrets( project, secrets ) - events_client = events_factory.EventsFactory().get_events_client() - event = events_client.generate_project_secret_event( - project=project, - secret_name=secret_name, - secret_keys=secrets, - action=mlrun.common.schemas.SecretEventActions.deleted - if deleted - else mlrun.common.schemas.SecretEventActions.updated, - ) - events_client.emit(event) + if action: + events_client = events_factory.EventsFactory().get_events_client() + event = events_client.generate_project_secret_event( + project=project, + secret_name=secret_name, + secret_keys=secrets, + action=action, + ) + events_client.emit(event) else: raise mlrun.errors.MLRunInternalServerError( diff --git a/mlrun/api/db/init_db.py b/mlrun/api/db/init_db.py index 20faa0ef97..6022efc21e 100644 --- a/mlrun/api/db/init_db.py +++ b/mlrun/api/db/init_db.py @@ -15,9 +15,7 @@ from mlrun.api.db.sqldb.models import Base from mlrun.api.db.sqldb.session import get_engine -from mlrun.config import config def init_db() -> None: - if config.httpdb.db_type != "filedb": - Base.metadata.create_all(bind=get_engine()) + Base.metadata.create_all(bind=get_engine()) diff --git a/mlrun/api/db/sqldb/db.py b/mlrun/api/db/sqldb/db.py index 7a6224c498..0b25c6e5d2 100644 --- a/mlrun/api/db/sqldb/db.py +++ b/mlrun/api/db/sqldb/db.py @@ -82,7 +82,6 @@ ) NULL = None # Avoid flake8 issuing warnings when comparing in filter -run_time_fmt = "%Y-%m-%dT%H:%M:%S.%fZ" unversioned_tagged_object_uid_prefix = "unversioned-" conflict_messages = [ @@ -2293,7 +2292,6 @@ def _update_feature_set_spec( feature_set_spec = new_feature_set_dict.get("spec") features = feature_set_spec.pop("features", []) entities = feature_set_spec.pop("entities", []) - self._update_feature_set_features(feature_set, features) self._update_feature_set_entities(feature_set, entities) @@ -2458,7 +2456,6 @@ def create_feature_set( ) db_feature_set = FeatureSet(project=project) - self._update_db_record_from_object_dict(db_feature_set, feature_set_dict, uid) self._update_feature_set_spec(db_feature_set, feature_set_dict) diff --git a/mlrun/api/db/sqldb/models/models_mysql.py b/mlrun/api/db/sqldb/models/models_mysql.py index c0cf2cf6da..3bd511ff6a 100644 --- a/mlrun/api/db/sqldb/models/models_mysql.py +++ b/mlrun/api/db/sqldb/models/models_mysql.py @@ -38,7 +38,6 @@ Base = declarative_base() NULL = None # Avoid flake8 issuing warnings when comparing in filter -run_time_fmt = "%Y-%m-%dT%H:%M:%S.%fZ" def make_label(table): diff --git a/mlrun/api/db/sqldb/models/models_sqlite.py b/mlrun/api/db/sqldb/models/models_sqlite.py index 88ae6607bb..c9a445cbf8 100644 --- a/mlrun/api/db/sqldb/models/models_sqlite.py +++ b/mlrun/api/db/sqldb/models/models_sqlite.py @@ -39,7 +39,6 @@ Base = declarative_base() NULL = None # Avoid flake8 issuing warnings when comparing in filter -run_time_fmt = "%Y-%m-%dT%H:%M:%S.%fZ" def make_label(table): diff --git a/mlrun/api/launcher.py b/mlrun/api/launcher.py index cd316a0abb..8b7aba6d0b 100644 --- a/mlrun/api/launcher.py +++ b/mlrun/api/launcher.py @@ -18,7 +18,7 @@ import mlrun.common.schemas.schedule import mlrun.config import mlrun.execution -import mlrun.launcher.base +import mlrun.launcher.base as launcher import mlrun.runtimes import mlrun.runtimes.generators import mlrun.runtimes.utils @@ -26,7 +26,7 @@ import mlrun.utils.regex -class ServerSideLauncher(mlrun.launcher.base.BaseLauncher): +class ServerSideLauncher(launcher.BaseLauncher): def launch( self, runtime: mlrun.runtimes.BaseRuntime, diff --git a/mlrun/api/main.py b/mlrun/api/main.py index ce1da49342..1688b2b260 100644 --- a/mlrun/api/main.py +++ b/mlrun/api/main.py @@ -607,7 +607,9 @@ def _push_terminal_run_notifications(db: mlrun.api.db.base.DBInterface, db_sessi # Unmasking the run parameters from secrets before handing them over to the notification handler # as importing the `Secrets` crud in the notification handler will cause a circular import unmasked_runs = [ - mlrun.api.api.utils.unmask_notification_params_secret_on_task(run) + mlrun.api.api.utils.unmask_notification_params_secret_on_task( + db, db_session, run + ) for run in runs ] diff --git a/mlrun/api/utils/clients/iguazio.py b/mlrun/api/utils/clients/iguazio.py index 77c58840da..513fc2b3fd 100644 --- a/mlrun/api/utils/clients/iguazio.py +++ b/mlrun/api/utils/clients/iguazio.py @@ -28,7 +28,7 @@ import requests.adapters from fastapi.concurrency import run_in_threadpool -import mlrun.api.utils.projects.remotes.leader +import mlrun.api.utils.projects.remotes.leader as project_leader import mlrun.common.schemas import mlrun.errors import mlrun.utils.helpers @@ -73,7 +73,7 @@ def all(): class Client( - mlrun.api.utils.projects.remotes.leader.Member, + project_leader.Member, metaclass=mlrun.utils.singleton.AbstractSingleton, ): def __init__(self, *args, **kwargs) -> None: @@ -93,37 +93,6 @@ def __init__(self, *args, **kwargs) -> None: self._logger = logger.get_child("iguazio-client") self._igz_clients = {} - def try_get_grafana_service_url(self, session: str) -> typing.Optional[str]: - """ - Try to find a ready grafana app service, and return its URL - If nothing found, returns None - """ - self._logger.debug("Getting grafana service url from Iguazio") - response = self._send_request_to_api( - "GET", - "app_services_manifests", - "Failed getting app services manifests from Iguazio", - session, - ) - response_body = response.json() - for app_services_manifest in response_body.get("data", []): - for app_service in app_services_manifest.get("attributes", {}).get( - "app_services", [] - ): - if ( - app_service.get("spec", {}).get("kind") == "grafana" - and app_service.get("status", {}).get("state") == "ready" - and len(app_service.get("status", {}).get("urls", [])) > 0 - ): - url_kind_to_url = {} - for url in app_service["status"]["urls"]: - url_kind_to_url[url["kind"]] = url["url"] - # precedence for https - for kind in ["https", "http"]: - if kind in url_kind_to_url: - return url_kind_to_url[kind] - return None - def verify_request_session( self, request: fastapi.Request ) -> mlrun.common.schemas.AuthInfo: @@ -195,10 +164,14 @@ def create_project( project: mlrun.common.schemas.Project, wait_for_completion: bool = True, ) -> bool: - self._logger.debug("Creating project in Iguazio", project=project) + self._logger.debug("Creating project in Iguazio", project=project.metadata.name) body = self._transform_mlrun_project_to_iguazio_project(project) return self._create_project_in_iguazio( - session, project.metadata.name, body, wait_for_completion + session, + project.metadata.name, + body, + wait_for_completion, + timeout=60, ) def update_project( @@ -322,9 +295,38 @@ def is_sync(self): """ return True - def emit_manual_event( - self, access_key: str, event: igz_mgmt.schemas.manual_events.ManualEventSchema - ): + def try_get_grafana_service_url(self, session: str) -> typing.Optional[str]: + """ + Try to find a ready grafana app service, and return its URL + If nothing found, returns None + """ + self._logger.debug("Getting grafana service url from Iguazio") + response = self._send_request_to_api( + "GET", + "app_services_manifests", + "Failed getting app services manifests from Iguazio", + session, + ) + response_body = response.json() + for app_services_manifest in response_body.get("data", []): + for app_service in app_services_manifest.get("attributes", {}).get( + "app_services", [] + ): + if ( + app_service.get("spec", {}).get("kind") == "grafana" + and app_service.get("status", {}).get("state") == "ready" + and len(app_service.get("status", {}).get("urls", [])) > 0 + ): + url_kind_to_url = {} + for url in app_service["status"]["urls"]: + url_kind_to_url[url["kind"]] = url["url"] + # precedence for https + for kind in ["https", "http"]: + if kind in url_kind_to_url: + return url_kind_to_url[kind] + return None + + def emit_manual_event(self, access_key: str, event: igz_mgmt.Event): """ Emit a manual event to Iguazio """ @@ -394,9 +396,14 @@ def _find_latest_updated_at( return latest_updated_at def _create_project_in_iguazio( - self, session: str, name: str, body: dict, wait_for_completion: bool + self, + session: str, + name: str, + body: dict, + wait_for_completion: bool, + **kwargs, ) -> bool: - _, job_id = self._post_project_to_iguazio(session, body) + _, job_id = self._post_project_to_iguazio(session, body, **kwargs) if wait_for_completion: self._logger.debug( "Waiting for project creation job in Iguazio", @@ -415,10 +422,18 @@ def _create_project_in_iguazio( return True def _post_project_to_iguazio( - self, session: str, body: dict + self, + session: str, + body: dict, + **kwargs, ) -> typing.Tuple[mlrun.common.schemas.Project, str]: response = self._send_request_to_api( - "POST", "projects", "Failed creating project in Iguazio", session, json=body + "POST", + "projects", + "Failed creating project in Iguazio", + session, + json=body, + **kwargs, ) response_body = response.json() return ( @@ -427,7 +442,11 @@ def _post_project_to_iguazio( ) def _put_project_to_iguazio( - self, session: str, name: str, body: dict + self, + session: str, + name: str, + body: dict, + **kwargs, ) -> mlrun.common.schemas.Project: response = self._send_request_to_api( "PUT", @@ -435,6 +454,7 @@ def _put_project_to_iguazio( "Failed updating project in Iguazio", session, json=body, + **kwargs, ) return self._transform_iguazio_project_to_mlrun_project(response.json()["data"]) @@ -743,6 +763,9 @@ def _handle_error_response( self, method, path, response, response_body, error_message, kwargs ): log_kwargs = copy.deepcopy(kwargs) + + # this can be big and spammy + log_kwargs.pop("json", None) log_kwargs.update({"method": method, "path": path}) try: ctx = response_body.get("meta", {}).get("ctx") diff --git a/mlrun/api/utils/clients/nuclio.py b/mlrun/api/utils/clients/nuclio.py index 9b42dac5c6..b6f170223d 100644 --- a/mlrun/api/utils/clients/nuclio.py +++ b/mlrun/api/utils/clients/nuclio.py @@ -20,7 +20,7 @@ import requests.adapters import sqlalchemy.orm -import mlrun.api.utils.projects.remotes.follower +import mlrun.api.utils.projects.remotes.follower as project_follower import mlrun.common.schemas import mlrun.errors import mlrun.utils.singleton @@ -28,7 +28,7 @@ class Client( - mlrun.api.utils.projects.remotes.follower.Member, + project_follower.Member, metaclass=mlrun.utils.singleton.AbstractSingleton, ): def __init__(self) -> None: diff --git a/mlrun/api/utils/events/base.py b/mlrun/api/utils/events/base.py index a8ce25ade5..15e18c1896 100644 --- a/mlrun/api/utils/events/base.py +++ b/mlrun/api/utils/events/base.py @@ -23,14 +23,14 @@ class BaseEventClient: def emit(self, event): pass - def generate_project_auth_secret_event( + def generate_auth_secret_event( self, username: str, secret_name: str, action: mlrun.common.schemas.AuthSecretEventActions, ): """ - Generate a project auth secret event + Generate an auth secret event :param username: username :param secret_name: secret name :param action: preformed action @@ -38,18 +38,6 @@ def generate_project_auth_secret_event( """ pass - @abc.abstractmethod - def generate_project_auth_secret_created_event( - self, username: str, secret_name: str - ): - pass - - @abc.abstractmethod - def generate_project_auth_secret_updated_event( - self, username: str, secret_name: str - ): - pass - @abc.abstractmethod def generate_project_secret_event( self, @@ -67,19 +55,3 @@ def generate_project_secret_event( :return: event object to emit """ pass - - @abc.abstractmethod - def generate_project_secret_created_event( - self, project: str, secret_name: str, secret_keys: typing.List[str] - ): - pass - - @abc.abstractmethod - def generate_project_secret_updated_event( - self, project: str, secret_name: str, secret_keys: typing.List[str] - ): - pass - - @abc.abstractmethod - def generate_project_secret_deleted_event(self, project: str, secret_name: str): - pass diff --git a/mlrun/api/utils/events/iguazio.py b/mlrun/api/utils/events/iguazio.py index cdf3dd0808..9c41e74478 100644 --- a/mlrun/api/utils/events/iguazio.py +++ b/mlrun/api/utils/events/iguazio.py @@ -14,7 +14,7 @@ # import typing -import igz_mgmt.schemas.manual_events +import igz_mgmt.schemas.events import mlrun.api.utils.clients.iguazio import mlrun.api.utils.events.base @@ -38,7 +38,7 @@ def __init__(self, access_key: str = None, verbose: bool = None): self.verbose = verbose if verbose is not None else mlrun.mlconf.events.verbose self.source = "mlrun-api" - def emit(self, event: igz_mgmt.schemas.manual_events.ManualEventSchema): + def emit(self, event: igz_mgmt.Event): try: logger.debug("Emitting event", event=event) mlrun.api.utils.clients.iguazio.Client().emit_manual_event( @@ -52,63 +52,33 @@ def emit(self, event: igz_mgmt.schemas.manual_events.ManualEventSchema): exc_info=exc, ) - def generate_project_auth_secret_event( + def generate_auth_secret_event( self, username: str, secret_name: str, action: mlrun.common.schemas.AuthSecretEventActions, - ) -> igz_mgmt.schemas.manual_events.ManualEventSchema: + ) -> igz_mgmt.Event: """ - Generate a project auth secret event + Generate an auth secret event :param username: username :param secret_name: secret name :param action: preformed action :return: event object to emit """ if action == mlrun.common.schemas.SecretEventActions.created: - return self.generate_project_auth_secret_created_event( - username, secret_name - ) + return self._generate_auth_secret_created_event(username, secret_name) elif action == mlrun.common.schemas.SecretEventActions.updated: - return self.generate_project_auth_secret_updated_event( - username, secret_name - ) + return self._generate_auth_secret_updated_event(username, secret_name) else: raise mlrun.errors.MLRunInvalidArgumentError(f"Unsupported action {action}") - def generate_project_auth_secret_created_event( - self, username: str, secret_name: str - ) -> igz_mgmt.schemas.manual_events.ManualEventSchema: - return igz_mgmt.schemas.manual_events.ManualEventSchema( - source=self.source, - kind=PROJECT_AUTH_SECRET_CREATED, - description=f"User {username} created secret {secret_name}", - severity=igz_mgmt.constants.EventSeverity.info, - classification=igz_mgmt.constants.EventClassification.security, - system_event=False, - visibility=igz_mgmt.constants.EventVisibility.external, - ) - - def generate_project_auth_secret_updated_event( - self, username: str, secret_name: str - ) -> igz_mgmt.schemas.manual_events.ManualEventSchema: - return igz_mgmt.schemas.manual_events.ManualEventSchema( - source=self.source, - kind=PROJECT_AUTH_SECRET_UPDATED, - description=f"User {username} updated secret {secret_name}", - severity=igz_mgmt.constants.EventSeverity.info, - classification=igz_mgmt.constants.EventClassification.security, - system_event=False, - visibility=igz_mgmt.constants.EventVisibility.external, - ) - def generate_project_secret_event( self, project: str, secret_name: str, secret_keys: typing.List[str] = None, action: mlrun.common.schemas.SecretEventActions = mlrun.common.schemas.SecretEventActions.created, - ) -> igz_mgmt.schemas.manual_events.ManualEventSchema: + ) -> igz_mgmt.Event: """ Generate a project secret event :param project: project name @@ -118,56 +88,124 @@ def generate_project_secret_event( :return: event object to emit """ if action == mlrun.common.schemas.SecretEventActions.created: - return self.generate_project_secret_created_event( + return self._generate_project_secret_created_event( project, secret_name, secret_keys ) elif action == mlrun.common.schemas.SecretEventActions.updated: - return self.generate_project_secret_updated_event( + return self._generate_project_secret_updated_event( project, secret_name, secret_keys ) elif action == mlrun.common.schemas.SecretEventActions.deleted: - return self.generate_project_secret_deleted_event(project, secret_name) + return self._generate_project_secret_deleted_event(project, secret_name) else: raise mlrun.errors.MLRunInvalidArgumentError(f"Unsupported action {action}") - def generate_project_secret_created_event( + def _generate_auth_secret_created_event( + self, username: str, secret_name: str + ) -> igz_mgmt.Event: + return igz_mgmt.Event( + source=self.source, + kind=PROJECT_AUTH_SECRET_CREATED, + description=f"User {username} created secret {secret_name}", + parameters_text=[ + igz_mgmt.schemas.events.ParametersText(name="username", value=username), + igz_mgmt.schemas.events.ParametersText( + name="secret_name", value=secret_name + ), + ], + severity=igz_mgmt.constants.EventSeverity.info, + classification=igz_mgmt.constants.EventClassification.security, + system_event=False, + visibility=igz_mgmt.constants.EventVisibility.external, + ) + + def _generate_auth_secret_updated_event( + self, username: str, secret_name: str + ) -> igz_mgmt.Event: + return igz_mgmt.Event( + source=self.source, + kind=PROJECT_AUTH_SECRET_UPDATED, + description=f"User {username} updated secret {secret_name}", + parameters_text=[ + igz_mgmt.schemas.events.ParametersText(name="username", value=username), + igz_mgmt.schemas.events.ParametersText( + name="secret_name", value=secret_name + ), + ], + severity=igz_mgmt.constants.EventSeverity.info, + classification=igz_mgmt.constants.EventClassification.security, + system_event=False, + visibility=igz_mgmt.constants.EventVisibility.external, + ) + + def _generate_project_secret_created_event( self, project: str, secret_name: str, secret_keys: typing.List[str] - ) -> igz_mgmt.schemas.manual_events.ManualEventSchema: + ) -> igz_mgmt.Event: normalized_secret_keys = self._list_to_string(secret_keys) - return igz_mgmt.schemas.manual_events.ManualEventSchema( + return igz_mgmt.Event( source=self.source, kind=PROJECT_SECRET_CREATED, - description=f"Created project secret {secret_name} with secret keys {normalized_secret_keys}" - f" for project {project}", + parameters_text=[ + igz_mgmt.schemas.events.ParametersText( + name="project_name", value=project + ), + igz_mgmt.schemas.events.ParametersText( + name="secret_name", value=secret_name + ), + igz_mgmt.schemas.events.ParametersText( + name="secret_keys", value=normalized_secret_keys + ), + ], + description=f"Project {project} secret created", severity=igz_mgmt.constants.EventSeverity.info, classification=igz_mgmt.constants.EventClassification.security, system_event=False, visibility=igz_mgmt.constants.EventVisibility.external, ) - def generate_project_secret_updated_event( + def _generate_project_secret_updated_event( self, project: str, secret_name: str, secret_keys: typing.List[str], - ) -> igz_mgmt.schemas.manual_events.ManualEventSchema: + ) -> igz_mgmt.Event: normalized_secret_keys = self._list_to_string(secret_keys) - return igz_mgmt.schemas.manual_events.ManualEventSchema( + return igz_mgmt.Event( source=self.source, kind=PROJECT_SECRET_UPDATED, - description=f"Updated secret keys {normalized_secret_keys} of project secret {secret_name} " - f"for project {project}", + description=f"Project {project} secret updated", + parameters_text=[ + igz_mgmt.schemas.events.ParametersText( + name="project_name", value=project + ), + igz_mgmt.schemas.events.ParametersText( + name="secret_name", value=secret_name + ), + igz_mgmt.schemas.events.ParametersText( + name="secret_keys", value=normalized_secret_keys + ), + ], severity=igz_mgmt.constants.EventSeverity.info, classification=igz_mgmt.constants.EventClassification.security, system_event=False, visibility=igz_mgmt.constants.EventVisibility.external, ) - def generate_project_secret_deleted_event(self, project: str, secret_name: str): - return igz_mgmt.schemas.manual_events.ManualEventSchema( + def _generate_project_secret_deleted_event( + self, project: str, secret_name: str + ) -> igz_mgmt.Event: + return igz_mgmt.Event( source=self.source, kind=PROJECT_SECRET_DELETED, - description=f"Deleted project secret {secret_name} for project {project}", + description=f"Project {project} secret deleted", + parameters_text=[ + igz_mgmt.schemas.events.ParametersText( + name="project_name", value=project + ), + igz_mgmt.schemas.events.ParametersText( + name="secret_name", value=secret_name + ), + ], severity=igz_mgmt.constants.EventSeverity.info, classification=igz_mgmt.constants.EventClassification.security, system_event=False, diff --git a/mlrun/api/utils/events/nop.py b/mlrun/api/utils/events/nop.py index 181583628e..f48a6da09b 100644 --- a/mlrun/api/utils/events/nop.py +++ b/mlrun/api/utils/events/nop.py @@ -22,14 +22,14 @@ class NopClient(mlrun.api.utils.events.base.BaseEventClient): def emit(self, event): return - def generate_project_auth_secret_event( + def generate_auth_secret_event( self, username: str, secret_name: str, action: mlrun.common.schemas.AuthSecretEventActions, ): """ - Generate a project auth secret event + Generate an auth secret event :param username: username :param secret_name: secret name :param action: preformed action @@ -37,16 +37,6 @@ def generate_project_auth_secret_event( """ return - def generate_project_auth_secret_created_event( - self, username: str, secret_name: str - ): - return - - def generate_project_auth_secret_updated_event( - self, username: str, secret_name: str - ): - return - def generate_project_secret_event( self, project: str, @@ -62,16 +52,4 @@ def generate_project_secret_event( :param action: preformed action :return: event object to emit """ - - def generate_project_secret_created_event( - self, project: str, secret_name: str, secret_keys: typing.List[str] - ): - return - - def generate_project_secret_updated_event( - self, project: str, secret_name: str, secret_keys: typing.List[str] - ): - return - - def generate_project_secret_deleted_event(self, project: str, secret_name: str): - return + pass diff --git a/mlrun/api/utils/periodic.py b/mlrun/api/utils/periodic.py index 43d7ca875b..d38aec6e91 100644 --- a/mlrun/api/utils/periodic.py +++ b/mlrun/api/utils/periodic.py @@ -27,7 +27,9 @@ # This module is different from mlrun.db.periodic in that this module's functions aren't supposed to persist # also this module supports asyncio while the other currently not # TODO: merge the modules -async def _periodic_function_wrapper(interval: int, function, *args, **kwargs): +async def _periodic_function_wrapper( + interval: typing.Union[int, float], function, *args, **kwargs +): while True: try: if asyncio.iscoroutinefunction(function): @@ -45,7 +47,12 @@ async def _periodic_function_wrapper(interval: int, function, *args, **kwargs): def run_function_periodically( - interval: int, name: str, replace: bool, function, *args, **kwargs + interval: typing.Union[float, int], + name: str, + replace: bool, + function, + *args, + **kwargs ): global tasks logger.debug("Submitting function to run periodically", name=name) diff --git a/mlrun/api/utils/projects/follower.py b/mlrun/api/utils/projects/follower.py index 0c9e552867..52bae5aaf8 100644 --- a/mlrun/api/utils/projects/follower.py +++ b/mlrun/api/utils/projects/follower.py @@ -27,7 +27,7 @@ import mlrun.api.utils.clients.iguazio import mlrun.api.utils.clients.nuclio import mlrun.api.utils.periodic -import mlrun.api.utils.projects.member +import mlrun.api.utils.projects.member as project_member import mlrun.api.utils.projects.remotes.leader import mlrun.api.utils.projects.remotes.nop_leader import mlrun.common.schemas @@ -42,7 +42,7 @@ class Member( - mlrun.api.utils.projects.member.Member, + project_member.Member, metaclass=mlrun.utils.singleton.AbstractSingleton, ): def initialize(self): diff --git a/mlrun/api/utils/projects/leader.py b/mlrun/api/utils/projects/leader.py index 38d2fb7e54..e7e11901e1 100644 --- a/mlrun/api/utils/projects/leader.py +++ b/mlrun/api/utils/projects/leader.py @@ -23,6 +23,7 @@ import mlrun.api.utils.clients.nuclio import mlrun.api.utils.periodic import mlrun.api.utils.projects.member +import mlrun.api.utils.projects.member as project_member import mlrun.api.utils.projects.remotes.follower import mlrun.api.utils.projects.remotes.nop_follower import mlrun.common.schemas @@ -37,7 +38,7 @@ class Member( - mlrun.api.utils.projects.member.Member, + project_member.Member, metaclass=mlrun.utils.singleton.AbstractSingleton, ): def initialize(self): diff --git a/mlrun/api/utils/projects/remotes/nop_follower.py b/mlrun/api/utils/projects/remotes/nop_follower.py index 7b970a1a7e..8ebf9202a1 100644 --- a/mlrun/api/utils/projects/remotes/nop_follower.py +++ b/mlrun/api/utils/projects/remotes/nop_follower.py @@ -18,12 +18,12 @@ import sqlalchemy.orm import mlrun.api.utils.helpers -import mlrun.api.utils.projects.remotes.follower +import mlrun.api.utils.projects.remotes.follower as project_follower import mlrun.common.schemas import mlrun.errors -class Member(mlrun.api.utils.projects.remotes.follower.Member): +class Member(project_follower.Member): def __init__(self) -> None: super().__init__() self._projects: typing.Dict[str, mlrun.common.schemas.Project] = {} diff --git a/mlrun/api/utils/projects/remotes/nop_leader.py b/mlrun/api/utils/projects/remotes/nop_leader.py index 43f5b65e62..cdd5ef0227 100644 --- a/mlrun/api/utils/projects/remotes/nop_leader.py +++ b/mlrun/api/utils/projects/remotes/nop_leader.py @@ -15,13 +15,13 @@ import datetime import typing -import mlrun.api.utils.projects.remotes.leader +import mlrun.api.utils.projects.remotes.leader as project_leader import mlrun.api.utils.singletons.project_member import mlrun.common.schemas import mlrun.errors -class Member(mlrun.api.utils.projects.remotes.leader.Member): +class Member(project_leader.Member): def __init__(self) -> None: super().__init__() self.db_session = None diff --git a/mlrun/api/utils/runtimes/__init__.py b/mlrun/api/utils/runtimes/__init__.py new file mode 100644 index 0000000000..33c5b3d3bd --- /dev/null +++ b/mlrun/api/utils/runtimes/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/mlrun/api/utils/runtimes/nuclio.py b/mlrun/api/utils/runtimes/nuclio.py new file mode 100644 index 0000000000..8f9cceabb3 --- /dev/null +++ b/mlrun/api/utils/runtimes/nuclio.py @@ -0,0 +1,43 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import mlrun.api.utils.clients.nuclio +from mlrun.config import config +from mlrun.errors import err_to_str +from mlrun.utils import logger + +cached_nuclio_version = None + + +# if nuclio version specified on mlrun config set it likewise, +# if not specified, get it from nuclio api client +# since this is a heavy operation (sending requests to API), and it's unlikely that the version +# will change - cache it (this means if we upgrade nuclio, we need to restart mlrun to re-fetch the new version) +def resolve_nuclio_version(): + global cached_nuclio_version + + if not cached_nuclio_version: + + # config override everything + nuclio_version = config.nuclio_version + if not nuclio_version and config.nuclio_dashboard_url: + try: + nuclio_client = mlrun.api.utils.clients.nuclio.Client() + nuclio_version = nuclio_client.get_dashboard_version() + except Exception as exc: + logger.warning("Failed to resolve nuclio version", exc=err_to_str(exc)) + + cached_nuclio_version = nuclio_version + + return cached_nuclio_version diff --git a/mlrun/api/utils/singletons/k8s.py b/mlrun/api/utils/singletons/k8s.py index ea04c9b1ec..753acff4e9 100644 --- a/mlrun/api/utils/singletons/k8s.py +++ b/mlrun/api/utils/singletons/k8s.py @@ -298,10 +298,12 @@ def get_auth_secret_name(self, access_key: str) -> str: def _hash_access_key(access_key: str): return hashlib.sha224(access_key.encode()).hexdigest() - def store_project_secrets(self, project, secrets, namespace="") -> (str, bool): + def store_project_secrets( + self, project, secrets, namespace="" + ) -> (str, typing.Optional[mlrun.common.schemas.SecretEventActions]): secret_name = self.get_project_secret_name(project) - created = self.store_secrets(secret_name, secrets, namespace) - return secret_name, created + action = self.store_secrets(secret_name, secrets, namespace) + return secret_name, action def read_auth_secret(self, secret_name, namespace="", raise_on_not_found=False): namespace = self.resolve_namespace(namespace) @@ -341,10 +343,10 @@ def _get_secret_value(key): def store_auth_secret( self, username: str, access_key: str, namespace="" - ) -> (str, bool): + ) -> (str, typing.Optional[mlrun.common.schemas.SecretEventActions]): """ Store the given access key as a secret in the cluster. The secret name is generated from the access key - :return: returns the secret name and a boolean indicating whether the secret was created or updated + :return: returns the secret name and the action taken against the secret """ secret_name = self.get_auth_secret_name(access_key) secret_data = { @@ -355,14 +357,14 @@ def store_auth_secret( "access_key" ): access_key, } - created = self.store_secrets( + action = self.store_secrets( secret_name, secret_data, namespace, type_=SecretTypes.v3io_fuse, labels={"mlrun/username": username}, ) - return secret_name, created + return secret_name, action def store_secrets( self, @@ -371,10 +373,15 @@ def store_secrets( namespace="", type_=SecretTypes.opaque, labels: typing.Optional[dict] = None, - ) -> bool: + ) -> typing.Optional[mlrun.common.schemas.SecretEventActions]: """ Store secrets in a kubernetes secret object - :return: returns True if the secret was created, False if it already existed and required an update + :param secret_name: the project secret name + :param secrets: the secrets to delete + :param namespace: k8s namespace + :param type_: k8s secret type + :param labels: k8s labels for the secret + :return: returns the action if the secret was created or updated, None if nothing changed """ namespace = self.resolve_namespace(namespace) try: @@ -392,7 +399,7 @@ def store_secrets( ) k8s_secret.string_data = secrets self.v1api.create_namespaced_secret(namespace, k8s_secret) - return True + return mlrun.common.schemas.SecretEventActions.created secret_data = k8s_secret.data.copy() for key, value in secrets.items(): @@ -400,7 +407,7 @@ def store_secrets( k8s_secret.data = secret_data self.v1api.replace_namespaced_secret(secret_name, namespace, k8s_secret) - return False + return mlrun.common.schemas.SecretEventActions.updated def load_secret(self, secret_name, namespace=""): namespace = namespace or self.resolve_namespace(namespace) @@ -412,51 +419,60 @@ def load_secret(self, secret_name, namespace=""): return k8s_secret.data - def delete_project_secrets(self, project, secrets, namespace="") -> (str, bool): + def delete_project_secrets( + self, project, secrets, namespace="" + ) -> (str, typing.Optional[mlrun.common.schemas.SecretEventActions]): """ Delete secrets from a kubernetes secret object - :return: returns the secret name and a boolean indicating whether the secret was deleted + :return: returns the secret name and the action taken against the secret """ secret_name = self.get_project_secret_name(project) - deleted = self.delete_secrets(secret_name, secrets, namespace) - return secret_name, deleted + action = self.delete_secrets(secret_name, secrets, namespace) + return secret_name, action def delete_auth_secret(self, secret_ref: str, namespace=""): self.delete_secrets(secret_ref, {}, namespace) - def delete_secrets(self, secret_name, secrets, namespace="") -> bool: + def delete_secrets( + self, secret_name, secrets, namespace="" + ) -> typing.Optional[mlrun.common.schemas.SecretEventActions]: """ Delete secrets from a kubernetes secret object - :return: returns True if the secret was deleted, False if it still exists and only deleted part of the keys + :param secret_name: the project secret name + :param secrets: the secrets to delete + :param namespace: k8s namespace + :return: returns the action if the secret was deleted or updated, None if nothing changed """ namespace = self.resolve_namespace(namespace) try: k8s_secret = self.v1api.read_namespaced_secret(secret_name, namespace) except ApiException as exc: - # If secret does not exist, return as if the deletion was successfully if exc.status == 404: - return + logger.info( + "Project secret does not exist, nothing to delete.", + secret_name=secret_name, + ) + return None else: logger.error( f"failed to retrieve k8s secret: {mlrun.errors.err_to_str(exc)}" ) raise exc - if not secrets: - secret_data = {} - else: + secret_data = {} + if secrets: secret_data = k8s_secret.data.copy() for secret in secrets: secret_data.pop(secret, None) - if not secret_data: - self.v1api.delete_namespaced_secret(secret_name, namespace) - return True - else: + if secret_data: k8s_secret.data = secret_data self.v1api.replace_namespaced_secret(secret_name, namespace, k8s_secret) - return False + return mlrun.common.schemas.SecretEventActions.updated + + self.v1api.delete_namespaced_secret(secret_name, namespace) + return mlrun.common.schemas.SecretEventActions.deleted def _get_project_secrets_raw_data(self, project, namespace=""): secret_name = self.get_project_secret_name(project) diff --git a/mlrun/api/utils/singletons/project_member.py b/mlrun/api/utils/singletons/project_member.py index 9422326fcb..3989198c1e 100644 --- a/mlrun/api/utils/singletons/project_member.py +++ b/mlrun/api/utils/singletons/project_member.py @@ -12,13 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +import typing + import mlrun.api.utils.projects.follower import mlrun.api.utils.projects.leader import mlrun.api.utils.projects.member import mlrun.config # TODO: something nicer -project_member: mlrun.api.utils.projects.member.Member = None +project_member: typing.Optional[mlrun.api.utils.projects.member.Member] = None def initialize_project_member(): diff --git a/mlrun/artifacts/base.py b/mlrun/artifacts/base.py index 390abd596e..7f2ac94c74 100644 --- a/mlrun/artifacts/base.py +++ b/mlrun/artifacts/base.py @@ -1027,7 +1027,10 @@ def generate_target_path(item: Artifact, artifact_path, producer): suffix = "/" if not item.is_dir: - suffix = os.path.splitext(item.src_path or "")[1] + + # suffixes yields a list of suffixes, e.g. ['.tar', '.gz'] + # join them together to get the full suffix, e.g. '.tar.gz' + suffix = "".join(pathlib.Path(item.src_path or "").suffixes) if not suffix and item.format: suffix = f".{item.format}" diff --git a/mlrun/common/schemas/__init__.py b/mlrun/common/schemas/__init__.py index fca503ec3a..c27042cab2 100644 --- a/mlrun/common/schemas/__init__.py +++ b/mlrun/common/schemas/__init__.py @@ -136,6 +136,7 @@ ProjectSummariesOutput, ProjectSummary, ) +from .regex import RegexMatchModes from .runs import RunIdentifier from .runtime_resource import ( GroupedByJobRuntimeResourcesOutput, diff --git a/mlrun/common/schemas/auth.py b/mlrun/common/schemas/auth.py index 4d31450f79..c193cc04c1 100644 --- a/mlrun/common/schemas/auth.py +++ b/mlrun/common/schemas/auth.py @@ -86,7 +86,9 @@ def to_resource_string( AuthorizationResourceTypes.model_endpoint: "/projects/{project_name}/model-endpoints/{resource_name}", AuthorizationResourceTypes.pipeline: "/projects/{project_name}/pipelines/{resource_name}", # Hub sources are not project-scoped, and auth is globally on the sources endpoint. - AuthorizationResourceTypes.hub_source: "/hub/sources", + # TODO - this was reverted to /marketplace since MLRun needs to be able to run with old igz versions. Once + # we only have support for igz versions that support /hub (>=3.5.4), change this to "/hub/sources". + AuthorizationResourceTypes.hub_source: "/marketplace/sources", }[self].format(project_name=project_name, resource_name=resource_name) diff --git a/mlrun/common/schemas/feature_store.py b/mlrun/common/schemas/feature_store.py index 7c8b474e83..9249b2a24b 100644 --- a/mlrun/common/schemas/feature_store.py +++ b/mlrun/common/schemas/feature_store.py @@ -48,6 +48,7 @@ class Config: class FeatureSetSpec(ObjectSpec): entities: List[Entity] = [] features: List[Feature] = [] + engine: Optional[str] = Field(default="storey") class FeatureSet(BaseModel): diff --git a/mlrun/common/schemas/function.py b/mlrun/common/schemas/function.py index 92f6ccb81f..0e7a0c89ba 100644 --- a/mlrun/common/schemas/function.py +++ b/mlrun/common/schemas/function.py @@ -45,6 +45,23 @@ class FunctionState: # same goes for the build which is not coming from the pod, but is used and we can't just omit it for BC reasons build = "build" + @classmethod + def get_function_state_from_pod_state(cls, pod_state: str): + if pod_state == "succeeded": + return cls.ready + if pod_state in ["failed", "error"]: + return cls.error + if pod_state in ["running", "pending"]: + return getattr(cls, pod_state) + return cls.unknown + + @classmethod + def terminal_states(cls): + return [ + cls.ready, + cls.error, + ] + class PreemptionModes(mlrun.common.types.StrEnum): # makes function pods be able to run on preemptible nodes diff --git a/mlrun/common/schemas/notification.py b/mlrun/common/schemas/notification.py index 6f0e36fddf..a16d8b26f6 100644 --- a/mlrun/common/schemas/notification.py +++ b/mlrun/common/schemas/notification.py @@ -42,12 +42,12 @@ class NotificationStatus(mlrun.common.types.StrEnum): class Notification(pydantic.BaseModel): - kind: NotificationKind = None - name: str = None - message: str = None - severity: NotificationSeverity = None - when: typing.List[str] = None - condition: str = None + kind: NotificationKind + name: str + message: str + severity: NotificationSeverity + when: typing.List[str] + condition: str params: typing.Dict[str, typing.Any] = None status: NotificationStatus = None sent_time: typing.Union[str, datetime.datetime] = None diff --git a/mlrun/common/schemas/regex.py b/mlrun/common/schemas/regex.py new file mode 100644 index 0000000000..fb817e33fa --- /dev/null +++ b/mlrun/common/schemas/regex.py @@ -0,0 +1,24 @@ +# Copyright 2018 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import mlrun.common.types + + +class RegexMatchModes(mlrun.common.types.StrEnum): + """Regex match modes""" + + # all regexes must match + all = "all" + # any of the regexes must match + any = "any" diff --git a/mlrun/config.py b/mlrun/config.py index 3e8ef4a8f5..9e36a0bc3f 100644 --- a/mlrun/config.py +++ b/mlrun/config.py @@ -559,7 +559,7 @@ # supported modes "enabled", "disabled". # "enabled" - events are emitted. # "disabled" - a nop client is used (aka doing nothing). - "mode": "enabled", + "mode": "disabled", "verbose": False, # used for igz client when emitting events "access_key": "", diff --git a/mlrun/datastore/__init__.py b/mlrun/datastore/__init__.py index 8a75579afc..af772ce467 100644 --- a/mlrun/datastore/__init__.py +++ b/mlrun/datastore/__init__.py @@ -29,8 +29,12 @@ "StreamSource", "KafkaSource", "RedisStore", + "DatabricksFileSystemDisableCache", + "DatabricksFileBugFixed", ] +import fsspec + import mlrun.datastore.wasbfs from ..platforms.iguazio import ( @@ -42,6 +46,7 @@ from ..utils import logger from .base import DataItem from .datastore import StoreManager, in_memory_store, uri_to_ipython +from .dbfs_store import DatabricksFileBugFixed, DatabricksFileSystemDisableCache from .s3 import parse_s3_bucket_and_key from .sources import ( BigQuerySource, @@ -62,6 +67,22 @@ store_manager = StoreManager() +if hasattr(fsspec, "register_implementation"): + fsspec.register_implementation( + "dbfs", DatabricksFileSystemDisableCache, clobber=True + ) +else: + from fsspec.registry import known_implementations + + known_implementations["dbfs"] = { + "class": "mlrun.datastore.dbfs_store.DatabricksFileSystemDisableCache", + "err": "Please make sure your fsspec version supports dbfs", + } + + del known_implementations + +del fsspec # clear the module namespace + def set_in_memory_item(key, value): item = store_manager.object(f"memory://{key}") diff --git a/mlrun/datastore/base.py b/mlrun/datastore/base.py index b84b41c877..4406d9d563 100644 --- a/mlrun/datastore/base.py +++ b/mlrun/datastore/base.py @@ -261,7 +261,7 @@ def reader(*args, **kwargs): updated_args = [f"{base_path}/{filename}"] updated_args.extend(args[1:]) dfs.append(df_module.read_csv(*updated_args, **kwargs)) - return pd.concat(dfs) + return df_module.concat(dfs) elif ( file_url.endswith(".parquet") diff --git a/mlrun/datastore/datastore.py b/mlrun/datastore/datastore.py index 80c2aab59d..0f4d884161 100644 --- a/mlrun/datastore/datastore.py +++ b/mlrun/datastore/datastore.py @@ -86,6 +86,10 @@ def schema_to_store(schema): "Google cloud storage packages are missing, use pip install mlrun[google-cloud-storage]" ) return GoogleCloudStorageStore + elif schema == "dbfs": + from .dbfs_store import DBFSStore + + return DBFSStore else: raise ValueError(f"unsupported store scheme ({schema})") @@ -175,6 +179,11 @@ def object( ) store, subpath = self.get_or_create_store(url, secrets=secrets) + schema, endpoint, parsed_url = parse_url(url) + # TODO: Modify the URL replacement to be outside of the dataitem. Dataitem class should + # be implemented as a generic class. + if endpoint and schema == "dbfs": + url = url.replace(endpoint, "", 1) return DataItem(key, store, subpath, url, meta=meta, artifact_url=artifact_url) def get_or_create_store(self, url, secrets: dict = None) -> (DataStore, str): diff --git a/mlrun/datastore/dbfs_store.py b/mlrun/datastore/dbfs_store.py new file mode 100644 index 0000000000..17f61d6283 --- /dev/null +++ b/mlrun/datastore/dbfs_store.py @@ -0,0 +1,168 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pathlib + +import fsspec +from fsspec.implementations.dbfs import DatabricksFile, DatabricksFileSystem + +import mlrun.errors + +from .base import DataStore, FileStats + + +class DatabricksFileBugFixed(DatabricksFile): + """Overrides DatabricksFile to add the following fix: https://github.com/fsspec/filesystem_spec/pull/1278""" + + def _upload_chunk(self, final=False): + """Internal function to add a chunk of data to a started upload""" + self.buffer.seek(0) + data = self.buffer.getvalue() + + data_chunks = [ + data[start:end] for start, end in self._to_sized_blocks(end=len(data)) + ] + + for data_chunk in data_chunks: + self.fs._add_data(handle=self.handle, data=data_chunk) + + if final: + self.fs._close_handle(handle=self.handle) + return True + + def _fetch_range(self, start, end): + """Internal function to download a block of data""" + return_buffer = b"" + for chunk_start, chunk_end in self._to_sized_blocks(start, end): + return_buffer += self.fs._get_data( + path=self.path, start=chunk_start, end=chunk_end + ) + + return return_buffer + + def _to_sized_blocks(self, start=0, end=100): + """Helper function to split a range from 0 to total_length into blocksizes""" + for data_chunk in range(start, end, self.blocksize): + data_start = data_chunk + data_end = min(end, data_chunk + self.blocksize) + yield data_start, data_end + + +class DatabricksFileSystemDisableCache(DatabricksFileSystem): + root_marker = "/" + protocol = "dbfs" + + def _open(self, path, mode="rb", block_size="default", **kwargs): + """ + Overwrite the base class method to make sure to create a DBFile. + All arguments are copied from the base method. + + Only the default blocksize is allowed. + """ + return DatabricksFileBugFixed( + self, path, mode=mode, block_size=block_size, **kwargs + ) + + # _ls_from_cache is not working properly, so we disable it. + def _ls_from_cache(self, path): + pass + + +# dbfs objects will be represented with the following URL: dbfs:// +class DBFSStore(DataStore): + def __init__(self, parent, schema, name, endpoint="", secrets: dict = None): + super().__init__(parent, name, schema, endpoint, secrets=secrets) + self.get_filesystem(silent=False) + + def get_filesystem(self, silent=True): + """return fsspec file system object, if supported""" + if not self._filesystem: + self._filesystem = fsspec.filesystem("dbfs", **self.get_storage_options()) + return self._filesystem + + def get_storage_options(self): + return dict( + token=self._get_secret_or_env("DATABRICKS_TOKEN"), instance=self.endpoint + ) + + def _verify_filesystem_and_key(self, key: str): + if not self._filesystem: + raise mlrun.errors.MLRunInvalidArgumentError( + "Performing actions on data-item without a valid filesystem" + ) + if not key.startswith("/"): + raise mlrun.errors.MLRunInvalidArgumentError( + "Invalid key parameter - key must start with '/'" + ) + + def get(self, key: str, size=None, offset=0) -> bytes: + self._verify_filesystem_and_key(key) + if size is not None and size <= 0: + raise mlrun.errors.MLRunInvalidArgumentError( + "size cannot be negative or zero" + ) + start = offset or None + end = offset + size if size is not None else None + return self._filesystem.cat_file(key, start=start, end=end) + + def put(self, key, data, append=False): + + self._verify_filesystem_and_key(key) + if append: + raise mlrun.errors.MLRunInvalidArgumentError( + "Append mode not supported for Databricks file system" + ) + # can not use append mode because it overrides data. + mode = "w" + if isinstance(data, bytes): + mode += "b" + elif not isinstance(data, str): + raise TypeError(f"Unknown data type {type(data)}") + with self._filesystem.open(key, mode) as f: + f.write(data) + + def upload(self, key: str, src_path: str): + self._verify_filesystem_and_key(key) + self._filesystem.put_file(src_path, key, overwrite=True) + + def stat(self, key: str): + self._verify_filesystem_and_key(key) + file = self._filesystem.stat(key) + if file["type"] == "file": + size = file["size"] + elif file["type"] == "directory": + raise FileNotFoundError("Operation expects a file not a directory!") + return FileStats(size, None) + + def listdir(self, key: str): + """ + Basic ls of file/dir - without recursion. + """ + self._verify_filesystem_and_key(key) + if self._filesystem.isfile(key): + return key + remote_path = f"{key}/*" + files = self._filesystem.glob(remote_path) + # Get only the files and directories under key path, without the key path itself. + # for example in a filesystem that has this path: /test_mlrun_dbfs_objects/test.txt + # listdir with the input /test_mlrun_dbfs_objects as a key will return ['test.txt']. + files = [pathlib.Path(file).name for file in files if "/" in file] + return files + + def rm(self, path, recursive=False, maxdepth=None): + if maxdepth: + raise mlrun.errors.MLRunInvalidArgumentError( + "dbfs file system does not support maxdepth option in rm function" + ) + self.get_filesystem().rm(path=path, recursive=recursive) diff --git a/mlrun/datastore/helpers.py b/mlrun/datastore/helpers.py new file mode 100644 index 0000000000..09accf8613 --- /dev/null +++ b/mlrun/datastore/helpers.py @@ -0,0 +1,18 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +ONE_GB = 1024 * 1024 * 1024 +ONE_MB = 1024 * 1024 diff --git a/mlrun/datastore/sources.py b/mlrun/datastore/sources.py index 14d278fb58..e4bb6ce8bf 100644 --- a/mlrun/datastore/sources.py +++ b/mlrun/datastore/sources.py @@ -427,6 +427,7 @@ def __init__( end_time=None, gcp_project: str = None, spark_options: dict = None, + **kwargs, ): if query and table: raise mlrun.errors.MLRunInvalidArgumentError( @@ -459,6 +460,7 @@ def __init__( schedule=schedule, start_time=start_time, end_time=end_time, + **kwargs, ) def _get_credentials_string(self): @@ -631,6 +633,7 @@ def __init__( database: str = None, schema: str = None, warehouse: str = None, + **kwargs, ): attrs = { "query": query, @@ -649,6 +652,7 @@ def __init__( schedule=schedule, start_time=start_time, end_time=end_time, + **kwargs, ) def _get_password(self): @@ -965,6 +969,7 @@ def __init__( spark_options: dict = None, time_fields: List[str] = None, parse_dates: List[str] = None, + **kwargs, ): """ Reads SqlDB as input source for a flow. @@ -1024,6 +1029,7 @@ def __init__( schedule=schedule, start_time=start_time, end_time=end_time, + **kwargs, ) def to_dataframe( diff --git a/mlrun/datastore/targets.py b/mlrun/datastore/targets.py index a7fff47cbe..d4c9f500a0 100644 --- a/mlrun/datastore/targets.py +++ b/mlrun/datastore/targets.py @@ -82,11 +82,12 @@ def default_target_names(): return [target.strip() for target in targets.split(",")] -def get_default_targets(): +def get_default_targets(offline_only=False): """initialize the default feature set targets list""" return [ DataTargetBase(target, name=str(target), partitioned=(target == "parquet")) for target in default_target_names() + if not offline_only or not target == "nosql" ] diff --git a/mlrun/datastore/utils.py b/mlrun/datastore/utils.py index 88b7e34447..9d7baba4cf 100644 --- a/mlrun/datastore/utils.py +++ b/mlrun/datastore/utils.py @@ -96,7 +96,7 @@ def filter_df_start_end_time( if isinstance(df, pd.DataFrame): return _execute_time_filter(df, time_column, start_time, end_time) else: - filter_df_generator(df, time_column, start_time, end_time) + return filter_df_generator(df, time_column, start_time, end_time) def filter_df_generator( diff --git a/mlrun/datastore/v3io.py b/mlrun/datastore/v3io.py index 1ecf61460a..5f300351c3 100644 --- a/mlrun/datastore/v3io.py +++ b/mlrun/datastore/v3io.py @@ -22,6 +22,7 @@ import v3io.dataplane import mlrun +from mlrun.datastore.helpers import ONE_GB, ONE_MB from ..platforms.iguazio import parse_path, split_path from .base import ( @@ -36,8 +37,6 @@ ) V3IO_LOCAL_ROOT = "v3io" -ONE_GB = 1024 * 1024 * 1024 -ONE_MB = 1024 * 1024 class V3ioStore(DataStore): diff --git a/mlrun/db/httpdb.py b/mlrun/db/httpdb.py index a6d431a0af..f1d88b1ab3 100644 --- a/mlrun/db/httpdb.py +++ b/mlrun/db/httpdb.py @@ -27,7 +27,6 @@ import semver import mlrun -import mlrun.api.utils.helpers import mlrun.common.schemas import mlrun.model_monitoring.model_endpoint import mlrun.projects @@ -737,8 +736,9 @@ def list_artifacts( # Show artifacts with label filters - both uploaded and of binary type result_labels = db.list_artifacts('results', tag='*', project='iris', labels=['uploaded', 'type=binary']) - :param name: Name of artifacts to retrieve. Name is used as a like query, and is not case-sensitive. This means - that querying for ``name`` may return artifacts named ``my_Name_1`` or ``surname``. + :param name: Name of artifacts to retrieve. Name with '~' prefix is used as a like query, and is not + case-sensitive. This means that querying for ``~name`` may return artifacts named + ``my_Name_1`` or ``surname``. :param project: Project name. :param tag: Return artifacts assigned this tag. :param labels: Return artifacts that have these labels. Labels can either be a dictionary {"label": "value"} or diff --git a/mlrun/feature_store/api.py b/mlrun/feature_store/api.py index 9cab99e467..ece2079b4d 100644 --- a/mlrun/feature_store/api.py +++ b/mlrun/feature_store/api.py @@ -62,7 +62,7 @@ run_ingestion_job, run_spark_graph, ) -from .retrieval import get_merger, init_feature_vector_graph, run_merge_job +from .retrieval import get_merger, run_merge_job _v3iofs = None spark_transform_handler = "transform" @@ -228,6 +228,7 @@ def get_online_feature_service( fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow, impute_policy: dict = None, update_stats: bool = False, + entity_keys: List[str] = None, ) -> OnlineVectorService: """initialize and return online feature vector service api, returns :py:class:`~mlrun.feature_store.OnlineVectorService` @@ -247,14 +248,15 @@ def get_online_feature_service( Example with imputing:: - with get_online_feature_service(vector_uri, impute_policy={"*": "$mean", "amount": 0)) as svc: + with get_online_feature_service(vector_uri, entity_keys=['id'], + impute_policy={"*": "$mean", "amount": 0)) as svc: resp = svc.get([{"id": "C123487"}]) 2. as simple function, note that in that option you need to close the session. Example:: - svc = get_online_feature_service(vector_uri) + svc = get_online_feature_service(vector_uri, entity_keys=['ticker']) try: resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}]) print(resp) @@ -266,7 +268,8 @@ def get_online_feature_service( Example with imputing:: - svc = get_online_feature_service(vector_uri, impute_policy={"*": "$mean", "amount": 0)) + svc = get_online_feature_service(vector_uri, entity_keys=['id'], + impute_policy={"*": "$mean", "amount": 0)) try: resp = svc.get([{"id": "C123487"}]) except Exception as e: @@ -274,15 +277,21 @@ def get_online_feature_service( finally: svc.close() - :param feature_vector: feature vector uri or FeatureVector object. passing feature vector obj requires update - permissions - :param run_config: function and/or run configuration for remote jobs/services - :param impute_policy: a dict with `impute_policy` per feature, the dict key is the feature name and the dict - value indicate which value will be used in case the feature is NaN/empty, the replaced - value can be fixed number for constants or $mean, $max, $min, $std, $count for statistical - values. "*" is used to specify the default for all features, example: `{"*": "$mean"}` - :param fixed_window_type: determines how to query the fixed window values which were previously inserted by ingest - :param update_stats: update features statistics from the requested feature sets on the vector. Default: False. + :param feature_vector: feature vector uri or FeatureVector object. passing feature vector obj requires update + permissions. + :param run_config: function and/or run configuration for remote jobs/services + :param impute_policy: a dict with `impute_policy` per feature, the dict key is the feature name and the dict + value indicate which value will be used in case the feature is NaN/empty, the replaced + value can be fixed number for constants or $mean, $max, $min, $std, $count + for statistical + values. "*" is used to specify the default for all features, example: `{"*": "$mean"}` + :param fixed_window_type: determines how to query the fixed window values which were previously inserted by ingest + :param update_stats: update features statistics from the requested feature sets on the vector. + Default: False. + :param entity_keys: Entity list of the first feature_set in the vector. + The indexes that are used to query the online service. + :return: Initialize the `OnlineVectorService`. + Will be used in subclasses where `support_online=True`. """ if isinstance(feature_vector, FeatureVector): update_stats = True @@ -294,17 +303,15 @@ def get_online_feature_service( if impute_policy and not feature_vector.status.stats: update_stats = True - graph, index_columns = init_feature_vector_graph( - feature_vector, fixed_window_type, update_stats=update_stats - ) - service = OnlineVectorService( - feature_vector, graph, index_columns, impute_policy=impute_policy - ) - service.initialize() - + engine_args = {"impute_policy": impute_policy} + merger_engine = get_merger("storey") # todo: support remote service (using remote nuclio/mlrun function if run_config) - return service + merger = merger_engine(feature_vector, **engine_args) + + return merger.init_online_vector_service( + entity_keys, fixed_window_type, update_stats=update_stats + ) def _rename_source_dataframe_columns(df): @@ -889,6 +896,8 @@ def _ingest_with_spark( if featureset.spec.graph and featureset.spec.graph.steps: df = run_spark_graph(df, featureset, namespace, spark) + df.persist() + if isinstance(df, Response) and df.status_code != 0: mlrun.errors.raise_for_status_code(df.status_code, df.body.split(": ")[1]) _infer_from_static_df(df, featureset, options=infer_options) diff --git a/mlrun/feature_store/feature_set.py b/mlrun/feature_store/feature_set.py index c43410e222..92c2a2bab5 100644 --- a/mlrun/feature_store/feature_set.py +++ b/mlrun/feature_store/feature_set.py @@ -477,7 +477,7 @@ def set_targets( targets = targets or [] if with_defaults: self.spec.with_default_targets = True - targets.extend(get_default_targets()) + targets.extend(get_default_targets(offline_only=self.spec.passthrough)) else: self.spec.with_default_targets = False @@ -944,8 +944,9 @@ def to_dataframe( ) df = self.spec.source.to_dataframe( columns=columns, - start_time=start_time, - end_time=end_time, + start_time=start_time + or pd.Timestamp.min, # overwrite `source.start_time` when the source is schedule. + end_time=end_time or pd.Timestamp.max, time_field=time_column, **kwargs, ) diff --git a/mlrun/feature_store/feature_vector.py b/mlrun/feature_store/feature_vector.py index ebfc00870a..c8a97e9c2d 100644 --- a/mlrun/feature_store/feature_vector.py +++ b/mlrun/feature_store/feature_vector.py @@ -384,13 +384,23 @@ def add_feature(name, alias, feature_set_object, feature_set_full_name): class OnlineVectorService: """get_online_feature_service response object""" - def __init__(self, vector, graph, index_columns, impute_policy: dict = None): + def __init__( + self, + vector, + graph, + index_columns, + all_fs_entities: List[str] = None, + impute_policy: dict = None, + requested_columns: List[str] = None, + ): self.vector = vector self.impute_policy = impute_policy or {} self._controller = graph.controller self._index_columns = index_columns + self._all_fs_entities = all_fs_entities self._impute_values = {} + self._requested_columns = requested_columns def __enter__(self): return self @@ -493,40 +503,39 @@ def get(self, entity_rows: List[Union[dict, list]], as_list=False): for row in entity_rows: futures.append(self._controller.emit(row, return_awaitable_result=True)) - requested_columns = list(self.vector.status.features.keys()) - aliases = self.vector.get_feature_aliases() - for i, column in enumerate(requested_columns): - requested_columns[i] = aliases.get(column, column) - for future in futures: result = future.await_result() data = result.body - for key in self._index_columns: - if data and key in data: - del data[key] - if not data: - data = None - else: + if data: actual_columns = data.keys() - for column in requested_columns: + if all([col in self._index_columns for col in actual_columns]): + # didn't get any data from the graph + results.append(None) + continue + for column in self._requested_columns: if ( column not in actual_columns and column != self.vector.status.label_column ): data[column] = None - if self._impute_values and data: - for name in data.keys(): - v = data[name] - if v is None or (type(v) == float and (np.isinf(v) or np.isnan(v))): - data[name] = self._impute_values.get(name, v) - for name in list(self.vector.spec.entity_fields.keys()): - data.pop(name, None) + if self._impute_values: + for name in data.keys(): + v = data[name] + if v is None or ( + type(v) == float and (np.isinf(v) or np.isnan(v)) + ): + data[name] = self._impute_values.get(name, v) + if not self.vector.spec.with_indexes: + for name in self._all_fs_entities: + data.pop(name, None) + if not any(data.values()): + data = None if as_list and data: data = [ data.get(key, None) - for key in requested_columns + for key in self._requested_columns if key != self.vector.status.label_column ] results.append(data) diff --git a/mlrun/feature_store/retrieval/__init__.py b/mlrun/feature_store/retrieval/__init__.py index 70be80b854..134d874ee6 100644 --- a/mlrun/feature_store/retrieval/__init__.py +++ b/mlrun/feature_store/retrieval/__init__.py @@ -16,13 +16,14 @@ from .dask_merger import DaskFeatureMerger from .job import run_merge_job # noqa from .local_merger import LocalFeatureMerger -from .online import init_feature_vector_graph # noqa from .spark_merger import SparkFeatureMerger +from .storey_merger import StoreyFeatureMerger mergers = { "local": LocalFeatureMerger, "dask": DaskFeatureMerger, "spark": SparkFeatureMerger, + "storey": StoreyFeatureMerger, } diff --git a/mlrun/feature_store/retrieval/base.py b/mlrun/feature_store/retrieval/base.py index 1bd4e34eb4..322f663165 100644 --- a/mlrun/feature_store/retrieval/base.py +++ b/mlrun/feature_store/retrieval/base.py @@ -31,6 +31,12 @@ class BaseMerger(abc.ABC): """abstract feature merger class""" + # In order to be an online merger, the merger should implement `init_online_vector_service` function. + support_online = False + + # In order to be an offline merger, the merger should implement + # `_order_by`, `_filter`, `_drop_columns_from_result`, `_rename_columns_and_select`, `_get_engine_df` functions. + support_offline = False engine = None def __init__(self, vector, **engine_args): @@ -45,6 +51,7 @@ def __init__(self, vector, **engine_args): self._target = None self._alias = dict() self._origin_alias = dict() + self._entity_rows_node_name = "__mlrun__$entity_rows$" def _append_drop_column(self, key): if key and key not in self._drop_columns: @@ -117,7 +124,7 @@ def start( # if end_time is not specified set it to now() end_time = pd.Timestamp.now() - return self._generate_vector( + return self._generate_offline_vector( entity_rows, entity_timestamp_column, feature_set_objects=feature_set_objects, @@ -129,8 +136,7 @@ def start( order_by=order_by, ) - def _write_to_target(self): - self.vector.spec.with_indexes = not self._drop_indexes + def _write_to_offline_target(self): if self._target: is_persistent_vector = self.vector.metadata.name is not None if not self._target.path and not is_persistent_vector: @@ -143,7 +149,7 @@ def _write_to_target(self): target_status = self._target.update_resource_status("ready", size=size) logger.info(f"wrote target: {target_status}") self.vector.save() - if self.vector.spec.with_indexes: + if not self._drop_indexes: self.vector.spec.entity_fields = [ Feature(name=feature, value_type=self._result_df[feature].dtype) if self._result_df[feature].dtype.name != "object" @@ -169,7 +175,7 @@ def _set_indexes(self, df): else: df.reset_index(drop=True, inplace=True) - def _generate_vector( + def _generate_offline_vector( self, entity_rows, entity_timestamp_column, @@ -287,7 +293,7 @@ def _generate_vector( "start_time and end_time can only be provided in conjunction with " "a timestamp column, or when the at least one feature_set has a timestamp key" ) - # convert pandas entity_rows to spark DF if needed + # convert pandas entity_rows to spark\dask DF if needed if ( entity_rows is not None and not hasattr(entity_rows, "rdd") @@ -356,9 +362,25 @@ def _generate_vector( ) self._order_by(order_by_active) - self._write_to_target() + self._write_to_offline_target() return OfflineVectorResponse(self) + def init_online_vector_service( + self, entity_keys, fixed_window_type, update_stats=False + ): + """ + initialize the `OnlineVectorService` + + :param entity_keys: list of the feature_vector indexes. + :param fixed_window_type: determines how to query the fixed window values which were previously + inserted by ingest + :param update_stats: update features statistics from the requested feature sets on the vector. + Default: False. + + :return: `OnlineVectorService` + """ + raise NotImplementedError + def _unpersist_df(self, df): pass @@ -415,7 +437,6 @@ def merge( self._result_df = merged_df return entity_timestamp_column - @abc.abstractmethod def _asof_join( self, entity_df, @@ -427,7 +448,6 @@ def _asof_join( ): raise NotImplementedError("_asof_join() operation not implemented in class") - @abc.abstractmethod def _join( self, entity_df, @@ -558,10 +578,11 @@ def concat(self, other): self.add_last(other_node) node = other_node - @staticmethod - def _create_linked_relation_list(feature_set_objects, feature_set_fields): + def _create_linked_relation_list( + self, feature_set_objects, feature_set_fields, entity_rows_keys=None + ): feature_set_names = list(feature_set_fields.keys()) - if len(feature_set_names) == 1: + if len(feature_set_names) == 1 and not entity_rows_keys: return BaseMerger._LinkedList( head=BaseMerger._Node( name=feature_set_names[0], @@ -660,8 +681,42 @@ def _build_relation( linked_list_relation.head.data["save_index"] = keys return linked_list_relation + def _build_entity_rows_relation(entity_rows_relation, fs_name, fs_order): + feature_set_entity_list = feature_set_entity_list_dict[fs_name] + feature_set_entity_list_names = list(feature_set_entity_list.keys()) + + if all([ent in entity_rows_keys for ent in feature_set_entity_list_names]): + # add to the link list feature set according to indexes match, + # only if all entities in the feature set exist in the entity rows + keys = feature_set_entity_list_names + entity_rows_relation.add_last( + BaseMerger._Node( + fs_name, + data={ + "left_keys": keys, + "right_keys": keys, + "save_cols": [], + "save_index": keys, + }, + order=fs_order, + ) + ) + entity_rows_relation.head.data["save_index"] = keys + + if entity_rows_keys is not None: + entity_rows_linked_relation = _create_relation( + self._entity_rows_node_name, -1 + ) + relation_linked_lists.append(entity_rows_linked_relation) + linked_list_len_goal = len(feature_set_objects) + 1 + else: + entity_rows_linked_relation = None + linked_list_len_goal = len(feature_set_objects) + for i, name in enumerate(feature_set_names): linked_relation = _create_relation(name, i) + if entity_rows_linked_relation is not None: + _build_entity_rows_relation(entity_rows_linked_relation, name, i) for j, name_in in enumerate(feature_set_names): if name != name_in: linked_relation = _build_relation(name_in, j, linked_relation, i) @@ -672,26 +727,23 @@ def _build_relation( return_relation = relation_linked_lists[i].__copy__() for relation_list in relation_linked_lists: return_relation.concat(relation_list) - if return_relation.len == len(feature_set_objects): + if return_relation.len == linked_list_len_goal: return return_relation raise mlrun.errors.MLRunRuntimeError("Failed to merge") - @classmethod def get_default_image(cls, kind): return mlrun.mlconf.feature_store.default_job_image def _reset_index(self, _result_df): raise NotImplementedError - @abc.abstractmethod def _create_engine_env(self): """ initialize engine env if needed """ raise NotImplementedError - @abc.abstractmethod def _get_engine_df( self, feature_set: FeatureSet, @@ -715,7 +767,6 @@ def _get_engine_df( """ raise NotImplementedError - @abc.abstractmethod def _rename_columns_and_select( self, df, @@ -733,14 +784,12 @@ def _rename_columns_and_select( """ raise NotImplementedError - @abc.abstractmethod def _drop_columns_from_result(self): """ drop `self._drop_columns` from `self._result_df` """ raise NotImplementedError - @abc.abstractmethod def _filter(self, query: str): """ filter `self._result_df` by `query` @@ -749,7 +798,6 @@ def _filter(self, query: str): """ raise NotImplementedError - @abc.abstractmethod def _order_by(self, order_by_active: typing.List[str]): """ Order by `order_by_active` along all axis. diff --git a/mlrun/feature_store/retrieval/dask_merger.py b/mlrun/feature_store/retrieval/dask_merger.py index d0ad11df14..8f6a4db755 100644 --- a/mlrun/feature_store/retrieval/dask_merger.py +++ b/mlrun/feature_store/retrieval/dask_merger.py @@ -25,6 +25,7 @@ class DaskFeatureMerger(BaseMerger): engine = "dask" + support_offline = True def __init__(self, vector, **engine_args): super().__init__(vector, **engine_args) diff --git a/mlrun/feature_store/retrieval/job.py b/mlrun/feature_store/retrieval/job.py index a08687e446..7ad119e4a2 100644 --- a/mlrun/feature_store/retrieval/job.py +++ b/mlrun/feature_store/retrieval/job.py @@ -130,15 +130,16 @@ def set_default_resources(resources, setter_function): watch=run_config.watch, ) logger.info(f"feature vector merge job started, run id = {run.uid()}") - return RemoteVectorResponse(vector, run) + return RemoteVectorResponse(vector, run, with_indexes) class RemoteVectorResponse: """get_offline_features response object""" - def __init__(self, vector, run): + def __init__(self, vector, run, with_indexes=False): self.run = run self.vector = vector + self.with_indexes = with_indexes or self.vector.spec.with_indexes @property def status(self): @@ -164,7 +165,7 @@ def to_dataframe(self, columns=None, df_module=None, **kwargs): df = mlrun.get_dataitem(self.target_uri).as_df( columns=columns, df_module=df_module, format=file_format, **kwargs ) - if self.vector.spec.with_indexes: + if self.with_indexes: df.set_index( list(self.vector.spec.entity_fields.keys()), inplace=True, drop=True ) diff --git a/mlrun/feature_store/retrieval/local_merger.py b/mlrun/feature_store/retrieval/local_merger.py index bd18c66e24..0ec90f7a39 100644 --- a/mlrun/feature_store/retrieval/local_merger.py +++ b/mlrun/feature_store/retrieval/local_merger.py @@ -21,6 +21,7 @@ class LocalFeatureMerger(BaseMerger): engine = "local" + support_offline = True def __init__(self, vector, **engine_args): super().__init__(vector, **engine_args) diff --git a/mlrun/feature_store/retrieval/online.py b/mlrun/feature_store/retrieval/online.py deleted file mode 100644 index aaa4008fb2..0000000000 --- a/mlrun/feature_store/retrieval/online.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright 2023 Iguazio -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import mlrun -from mlrun.datastore.store_resources import ResourceCache -from mlrun.datastore.targets import get_online_target -from mlrun.serving.server import create_graph_server - - -def _build_feature_vector_graph( - vector, - feature_set_fields, - feature_set_objects, - fixed_window_type, -): - graph = vector.spec.graph.copy() - start_states, default_final_state, responders = graph.check_and_process_graph( - allow_empty=True - ) - next = graph - - for name, columns in feature_set_fields.items(): - featureset = feature_set_objects[name] - column_names = [name for name, alias in columns] - aliases = {name: alias for name, alias in columns if alias} - - entity_list = list(featureset.spec.entities.keys()) - next = next.to( - "storey.QueryByKey", - f"query-{name}", - features=column_names, - table=featureset.uri, - key_field=entity_list, - aliases=aliases, - fixed_window_type=fixed_window_type.to_qbk_fixed_window_type(), - ) - for name in start_states: - next.set_next(name) - - if not start_states: # graph was empty - next.respond() - elif not responders and default_final_state: # graph has clear state sequence - graph[default_final_state].respond() - elif not responders: - raise mlrun.errors.MLRunInvalidArgumentError( - "the graph doesnt have an explicit final step to respond on" - ) - return graph - - -def init_feature_vector_graph(vector, query_options, update_stats=False): - try: - from storey import SyncEmitSource - except ImportError as exc: - raise ImportError(f"storey not installed, use pip install storey, {exc}") - - feature_set_objects, feature_set_fields = vector.parse_features( - offline=False, update_stats=update_stats - ) - if not feature_set_fields: - raise mlrun.errors.MLRunRuntimeError( - f"No features found for feature vector '{vector.metadata.name}'" - ) - graph = _build_feature_vector_graph( - vector, feature_set_fields, feature_set_objects, query_options - ) - graph.set_flow_source(SyncEmitSource()) - server = create_graph_server(graph=graph, parameters={}) - - cache = ResourceCache() - index_columns = [] - for featureset in feature_set_objects.values(): - driver = get_online_target(featureset) - if not driver: - raise mlrun.errors.MLRunInvalidArgumentError( - f"resource {featureset.uri} does not have an online data target" - ) - cache.cache_table(featureset.uri, driver.get_table_object()) - for key in featureset.spec.entities.keys(): - if not vector.spec.with_indexes and key not in index_columns: - index_columns.append(key) - server.init_states(context=None, namespace=None, resource_cache=cache) - server.init_object(None) - return graph, index_columns diff --git a/mlrun/feature_store/retrieval/spark_merger.py b/mlrun/feature_store/retrieval/spark_merger.py index 47ce7c053a..157a164263 100644 --- a/mlrun/feature_store/retrieval/spark_merger.py +++ b/mlrun/feature_store/retrieval/spark_merger.py @@ -23,6 +23,7 @@ class SparkFeatureMerger(BaseMerger): engine = "spark" + support_offline = True def __init__(self, vector, **engine_args): super().__init__(vector, **engine_args) @@ -197,6 +198,7 @@ def _get_engine_df( end_time=None, time_column=None, ): + source_kwargs = {} if feature_set.spec.passthrough: if not feature_set.spec.source: raise mlrun.errors.MLRunNotFoundError( @@ -204,6 +206,7 @@ def _get_engine_df( ) source_kind = feature_set.spec.source.kind source_path = feature_set.spec.source.path + source_kwargs.update(feature_set.spec.source.attributes) else: target = get_offline_target(feature_set) if not target: @@ -223,6 +226,7 @@ def _get_engine_df( time_field=time_column, start_time=start_time, end_time=end_time, + **source_kwargs, ) columns = column_names + [ent.name for ent in feature_set.spec.entities] diff --git a/mlrun/feature_store/retrieval/storey_merger.py b/mlrun/feature_store/retrieval/storey_merger.py new file mode 100644 index 0000000000..6a87a426dd --- /dev/null +++ b/mlrun/feature_store/retrieval/storey_merger.py @@ -0,0 +1,173 @@ +# Copyright 2018 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import mlrun +from mlrun.datastore.store_resources import ResourceCache +from mlrun.datastore.targets import get_online_target +from mlrun.serving.server import create_graph_server + +from ..feature_vector import OnlineVectorService +from .base import BaseMerger + + +class StoreyFeatureMerger(BaseMerger): + engine = "storey" + support_online = True + + def __init__(self, vector, **engine_args): + super().__init__(vector, **engine_args) + self.impute_policy = engine_args.get("impute_policy") + + def _generate_online_feature_vector_graph( + self, + entity_keys, + feature_set_fields, + feature_set_objects, + fixed_window_type, + ): + graph = self.vector.spec.graph.copy() + start_states, default_final_state, responders = graph.check_and_process_graph( + allow_empty=True + ) + next = graph + + fs_link_list = self._create_linked_relation_list( + feature_set_objects, feature_set_fields, entity_keys + ) + + all_columns = [] + save_column = [] + entity_keys = [] + end_aliases = {} + for node in fs_link_list: + name = node.name + if name == self._entity_rows_node_name: + continue + featureset = feature_set_objects[name] + columns = feature_set_fields[name] + column_names = [name for name, alias in columns] + aliases = {name: alias for name, alias in columns if alias} + all_columns += [aliases.get(name, name) for name in column_names] + for col in node.data["save_cols"]: + if col not in column_names: + column_names.append(col) + else: + save_column.append(col) + + entity_list = node.data["right_keys"] or list( + featureset.spec.entities.keys() + ) + if not entity_keys: + # if entity_keys not provided by the user we will set it to be the entity of the first feature set + entity_keys = entity_list + end_aliases.update( + { + k: v + for k, v in zip(entity_list, node.data["left_keys"]) + if k != v and v in save_column + } + ) + mapping = { + k: v for k, v in zip(node.data["left_keys"], entity_list) if k != v + } + if mapping: + next = next.to( + "storey.Rename", + f"rename-{name}", + mapping=mapping, + ) + + next = next.to( + "storey.QueryByKey", + f"query-{name}", + features=column_names, + table=featureset.uri, + key_field=entity_list, + aliases=aliases, + fixed_window_type=fixed_window_type.to_qbk_fixed_window_type(), + ) + if end_aliases: + # run if the user want to save a column that related to another entity + next = next.to( + "storey.Rename", + "rename-entity-to-features", + mapping=end_aliases, + ) + for name in start_states: + next.set_next(name) + + if not start_states: # graph was empty + next.respond() + elif not responders and default_final_state: # graph has clear state sequence + graph[default_final_state].respond() + elif not responders: + raise mlrun.errors.MLRunInvalidArgumentError( + "the graph doesnt have an explicit final step to respond on" + ) + return graph, all_columns, entity_keys + + def init_online_vector_service( + self, entity_keys, fixed_window_type, update_stats=False + ): + try: + from storey import SyncEmitSource + except ImportError as exc: + raise ImportError(f"storey not installed, use pip install storey, {exc}") + + feature_set_objects, feature_set_fields = self.vector.parse_features( + offline=False, update_stats=update_stats + ) + if not feature_set_fields: + raise mlrun.errors.MLRunRuntimeError( + f"No features found for feature vector '{self.vector.metadata.name}'" + ) + ( + graph, + requested_columns, + entity_keys, + ) = self._generate_online_feature_vector_graph( + entity_keys, + feature_set_fields, + feature_set_objects, + fixed_window_type, + ) + graph.set_flow_source(SyncEmitSource()) + server = create_graph_server(graph=graph, parameters={}) + + cache = ResourceCache() + all_fs_entities = [] + for featureset in feature_set_objects.values(): + driver = get_online_target(featureset) + if not driver: + raise mlrun.errors.MLRunInvalidArgumentError( + f"resource {featureset.uri} does not have an online data target" + ) + cache.cache_table(featureset.uri, driver.get_table_object()) + + for key in featureset.spec.entities.keys(): + if key not in all_fs_entities: + all_fs_entities.append(key) + server.init_states(context=None, namespace=None, resource_cache=cache) + server.init_object(None) + + service = OnlineVectorService( + self.vector, + graph, + entity_keys, + all_fs_entities=all_fs_entities, + impute_policy=self.impute_policy, + requested_columns=requested_columns, + ) + service.initialize() + + return service diff --git a/mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py b/mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py index 473615cae8..a8bd1fc650 100644 --- a/mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +++ b/mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py @@ -33,7 +33,6 @@ class CalibrationCurvePlan(MLPlotPlan): def __init__( self, - normalize: bool = False, n_bins: int = 5, strategy: str = "uniform", ): @@ -43,14 +42,11 @@ def __init__( To read more about the parameters, head to the SciKit-Learn docs at: https://scikit-learn.org/stable/modules/generated/sklearn.calibration.calibration_curve.html - :param normalize: Whether the probabilities needs to be normalized into the [0, 1] interval, i.e. is not a - proper probability. :param n_bins: Number of bins to discretize the [0, 1] interval. :param strategy: Strategy used to define the widths of the bins. Can be on of {‘uniform’, ‘quantile’}. Default: "uniform". """ # Store the parameters: - self._normalize = normalize self._n_bins = n_bins self._strategy = strategy @@ -94,7 +90,6 @@ def produce( y, y_pred[:, -1], # Take only the second class probabilities (1, not 0). n_bins=self._n_bins, - normalize=self._normalize, strategy=self._strategy, ) diff --git a/mlrun/kfpops.py b/mlrun/kfpops.py index f6a67e9b63..e6ee96e5d4 100644 --- a/mlrun/kfpops.py +++ b/mlrun/kfpops.py @@ -758,21 +758,7 @@ def format_summary_from_kfp_run(kfp_run, project=None, session=None): short_run = { "graph": dag, - "run": { - k: str(v) if v is not None else v - for k, v in kfp_run["run"].items() - if k - in [ - "id", - "name", - "status", - "error", - "created_at", - "scheduled_at", - "finished_at", - "description", - ] - }, + "run": mlrun.utils.helpers.format_run(kfp_run["run"]), } short_run["run"]["project"] = project short_run["run"]["message"] = message diff --git a/mlrun/launcher/base.py b/mlrun/launcher/base.py index bb491b218c..ab315095b9 100644 --- a/mlrun/launcher/base.py +++ b/mlrun/launcher/base.py @@ -38,6 +38,53 @@ class BaseLauncher(abc.ABC): Each context will have its own implementation of the abstract methods while the common logic resides in this class """ + @abc.abstractmethod + def launch( + self, + runtime: "mlrun.runtimes.BaseRuntime", + task: Optional[ + Union["mlrun.run.RunTemplate", "mlrun.run.RunObject", dict] + ] = None, + handler: Optional[Union[str, Callable]] = None, + name: Optional[str] = "", + project: Optional[str] = "", + params: Optional[dict] = None, + inputs: Optional[Dict[str, str]] = None, + out_path: Optional[str] = "", + workdir: Optional[str] = "", + artifact_path: Optional[str] = "", + watch: Optional[bool] = True, + schedule: Optional[ + Union[str, mlrun.common.schemas.schedule.ScheduleCronTrigger] + ] = None, + hyperparams: Dict[str, list] = None, + hyper_param_options: Optional[mlrun.model.HyperParamOptions] = None, + verbose: Optional[bool] = None, + scrape_metrics: Optional[bool] = None, + local_code_path: Optional[str] = None, + auto_build: Optional[bool] = None, + param_file_secrets: Optional[Dict[str, str]] = None, + notifications: Optional[List[mlrun.model.Notification]] = None, + returns: Optional[List[Union[str, Dict[str, str]]]] = None, + ) -> "mlrun.run.RunObject": + """run the function from the server/client[local/remote]""" + pass + + @staticmethod + @abc.abstractmethod + def enrich_runtime( + runtime: "mlrun.runtimes.base.BaseRuntime", + project_name: Optional[str] = "", + ): + pass + + @staticmethod + @abc.abstractmethod + def _store_function( + runtime: "mlrun.runtimes.BaseRuntime", run: "mlrun.run.RunObject" + ): + pass + def save_function( self, runtime: "mlrun.runtimes.BaseRuntime", @@ -73,36 +120,9 @@ def save_function( hash_key = hash_key if versioned else None return "db://" + runtime._function_uri(hash_key=hash_key, tag=tag) - @abc.abstractmethod - def launch( - self, - runtime: "mlrun.runtimes.BaseRuntime", - task: Optional[ - Union["mlrun.run.RunTemplate", "mlrun.run.RunObject", dict] - ] = None, - handler: Optional[Union[str, Callable]] = None, - name: Optional[str] = "", - project: Optional[str] = "", - params: Optional[dict] = None, - inputs: Optional[Dict[str, str]] = None, - out_path: Optional[str] = "", - workdir: Optional[str] = "", - artifact_path: Optional[str] = "", - watch: Optional[bool] = True, - schedule: Optional[ - Union[str, mlrun.common.schemas.schedule.ScheduleCronTrigger] - ] = None, - hyperparams: Dict[str, list] = None, - hyper_param_options: Optional[mlrun.model.HyperParamOptions] = None, - verbose: Optional[bool] = None, - scrape_metrics: Optional[bool] = None, - local_code_path: Optional[str] = None, - auto_build: Optional[bool] = None, - param_file_secrets: Optional[Dict[str, str]] = None, - notifications: Optional[List[mlrun.model.Notification]] = None, - returns: Optional[List[Union[str, Dict[str, str]]]] = None, - ) -> "mlrun.run.RunObject": - """run the function from the server/client[local/remote]""" + @staticmethod + def prepare_image_for_deploy(runtime: "mlrun.runtimes.BaseRuntime"): + """Check if the runtime requires to build the image and updates the spec accordingly""" pass def _validate_runtime( @@ -190,8 +210,8 @@ def _create_run_object(task): # task is already a RunObject return task + @staticmethod def _enrich_run( - self, runtime, run, handler=None, @@ -379,26 +399,6 @@ def _wrap_run_result( def _refresh_function_metadata(runtime: "mlrun.runtimes.BaseRuntime"): pass - @staticmethod - def prepare_image_for_deploy(runtime: "mlrun.runtimes.BaseRuntime"): - """Check if the runtime requires to build the image and updates the spec accordingly""" - pass - - @staticmethod - @abc.abstractmethod - def enrich_runtime( - runtime: "mlrun.runtimes.base.BaseRuntime", - project_name: Optional[str] = "", - ): - pass - - @staticmethod - @abc.abstractmethod - def _store_function( - runtime: "mlrun.runtimes.BaseRuntime", run: "mlrun.run.RunObject" - ): - pass - @staticmethod def _log_track_results( runtime: "mlrun.runtimes.BaseRuntime", result: dict, run: "mlrun.run.RunObject" diff --git a/mlrun/launcher/client.py b/mlrun/launcher/client.py index c4024740e8..1774da066a 100644 --- a/mlrun/launcher/client.py +++ b/mlrun/launcher/client.py @@ -19,14 +19,14 @@ import IPython import mlrun.errors -import mlrun.launcher.base +import mlrun.launcher.base as launcher import mlrun.lists import mlrun.model import mlrun.runtimes from mlrun.utils import logger -class ClientBaseLauncher(mlrun.launcher.base.BaseLauncher, abc.ABC): +class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC): """ Abstract class for common code between client launchers """ diff --git a/mlrun/launcher/local.py b/mlrun/launcher/local.py index bbf63f64bb..8f4900f781 100644 --- a/mlrun/launcher/local.py +++ b/mlrun/launcher/local.py @@ -17,7 +17,7 @@ import mlrun.common.schemas.schedule import mlrun.errors -import mlrun.launcher.client +import mlrun.launcher.client as launcher import mlrun.run import mlrun.runtimes.generators import mlrun.utils.clones @@ -25,7 +25,7 @@ from mlrun.utils import logger -class ClientLocalLauncher(mlrun.launcher.client.ClientBaseLauncher): +class ClientLocalLauncher(launcher.ClientBaseLauncher): """ ClientLocalLauncher is a launcher that runs the job locally. Either on the user's machine (_is_run_local is True) or on a remote machine (_is_run_local is False). @@ -119,14 +119,14 @@ def launch( notifications=notifications, ) self._validate_runtime(runtime, run) - result = self.execute( + result = self._execute( runtime=runtime, run=run, ) return result - def execute( + def _execute( self, runtime: "mlrun.runtimes.BaseRuntime", run: Optional[Union["mlrun.run.RunTemplate", "mlrun.run.RunObject"]] = None, diff --git a/mlrun/launcher/remote.py b/mlrun/launcher/remote.py index 463d67ddbf..0f3d1bfcfc 100644 --- a/mlrun/launcher/remote.py +++ b/mlrun/launcher/remote.py @@ -19,7 +19,7 @@ import mlrun.common.schemas.schedule import mlrun.db import mlrun.errors -import mlrun.launcher.client +import mlrun.launcher.client as launcher import mlrun.run import mlrun.runtimes import mlrun.runtimes.generators @@ -28,7 +28,7 @@ from mlrun.utils import logger -class ClientRemoteLauncher(mlrun.launcher.client.ClientBaseLauncher): +class ClientRemoteLauncher(launcher.ClientBaseLauncher): def launch( self, runtime: "mlrun.runtimes.KubejobRuntime", @@ -106,9 +106,9 @@ def launch( ) self._store_function(runtime, run) - return self.submit_job(runtime, run, schedule, watch) + return self._submit_job(runtime, run, schedule, watch) - def submit_job( + def _submit_job( self, runtime: "mlrun.runtimes.KubejobRuntime", run: "mlrun.run.RunObject", diff --git a/mlrun/lists.py b/mlrun/lists.py index 45b0e6c278..3a9be47f2a 100644 --- a/mlrun/lists.py +++ b/mlrun/lists.py @@ -92,7 +92,9 @@ def to_rows(self, extend_iterations=False): return [list_header] + rows - def to_df(self, flat=False, extend_iterations=False, cache=True): + def to_df( + self, flat: bool = False, extend_iterations: bool = False, cache: bool = True + ) -> pd.DataFrame: """convert the run list to a dataframe""" if hasattr(self, "_df") and cache: return self._df @@ -179,10 +181,16 @@ def to_rows(self): "producer": ["producer", "spec.producer"], "sources": ["sources", "spec.sources"], "labels": ["labels", "metadata.labels"], + # important: the uri item must be the last one in this dict since there is no artifact.uri, and we fill it + # in the following for loop as the "last_index" in the dict + "uri": ["uri", "uri"], } for artifact in self: fields_index = 0 if is_legacy_artifact(artifact) else 1 row = [get_in(artifact, v[fields_index], "") for k, v in head.items()] + artifact_uri = dict_to_artifact(artifact).uri + last_index = len(row) - 1 + row[last_index] = artifact_uri rows.append(row) return [head.keys()] + rows diff --git a/mlrun/model.py b/mlrun/model.py index c8f6411221..e6f889ec68 100644 --- a/mlrun/model.py +++ b/mlrun/model.py @@ -549,12 +549,14 @@ def __init__( status=None, sent_time=None, ): - self.kind = kind - self.name = name - self.message = message - self.severity = severity - self.when = when - self.condition = condition + self.kind = kind or mlrun.common.schemas.notification.NotificationKind.slack + self.name = name or "" + self.message = message or "" + self.severity = ( + severity or mlrun.common.schemas.notification.NotificationSeverity.INFO + ) + self.when = when or ["completed"] + self.condition = condition or "" self.params = params or {} self.status = status self.sent_time = sent_time diff --git a/mlrun/model_monitoring/stream_processing_fs.py b/mlrun/model_monitoring/stream_processing_fs.py index 7ecd623e86..bbaef76426 100644 --- a/mlrun/model_monitoring/stream_processing_fs.py +++ b/mlrun/model_monitoring/stream_processing_fs.py @@ -50,19 +50,15 @@ def __init__( parquet_target: str, sample_window: int = 10, parquet_batching_timeout_secs: int = 30 * 60, # Default 30 minutes - aggregate_count_windows: typing.Optional[typing.List[str]] = None, - aggregate_count_period: str = "30s", - aggregate_avg_windows: typing.Optional[typing.List[str]] = None, - aggregate_avg_period: str = "30s", + aggregate_windows: typing.Optional[typing.List[str]] = None, + aggregate_period: str = "30s", model_monitoring_access_key: str = None, ): # General configurations, mainly used for the storey steps in the future serving graph self.project = project self.sample_window = sample_window - self.aggregate_count_windows = aggregate_count_windows or ["5m", "1h"] - self.aggregate_count_period = aggregate_count_period - self.aggregate_avg_windows = aggregate_avg_windows or ["5m", "1h"] - self.aggregate_avg_period = aggregate_avg_period + self.aggregate_windows = aggregate_windows or ["5m", "1h"] + self.aggregate_period = aggregate_period # Parquet path and configurations self.parquet_path = parquet_target @@ -202,38 +198,34 @@ def apply_map_feature_names(): # Step 5 - Calculate number of predictions and average latency def apply_storey_aggregations(): - # Step 5.1 - Calculate number of predictions for each window (5 min and 1 hour by default) + # Step 5.1 - Calculate number of predictions and average latency for each window (5 min and 1 hour) graph.add_step( class_name="storey.AggregateByKey", aggregates=[ { - "name": EventFieldType.PREDICTIONS, - "column": EventFieldType.ENDPOINT_ID, - "operations": ["count"], - "windows": self.aggregate_count_windows, - "period": self.aggregate_count_period, + "name": EventFieldType.LATENCY, + "column": EventFieldType.LATENCY, + "operations": ["count", "avg"], + "windows": self.aggregate_windows, + "period": self.aggregate_period, } ], - name=EventFieldType.PREDICTIONS, + name=EventFieldType.LATENCY, after="MapFeatureNames", step_name="Aggregates", table=".", + key_field=EventFieldType.ENDPOINT_ID, ) - # Step 5.2 - Calculate average latency time for each window (5 min and 1 hour by default) + + # Step 5.2 - Rename the latency counter field to prediction counter graph.add_step( - class_name="storey.AggregateByKey", - aggregates=[ - { - "name": EventFieldType.LATENCY, - "column": EventFieldType.LATENCY, - "operations": ["avg"], - "windows": self.aggregate_avg_windows, - "period": self.aggregate_avg_period, - } - ], - name=EventFieldType.LATENCY, - after=EventFieldType.PREDICTIONS, - table=".", + class_name="storey.Rename", + mapping={ + "latency_count_5m": EventLiveStats.PREDICTIONS_COUNT_5M, + "latency_count_1h": EventLiveStats.PREDICTIONS_COUNT_1H, + }, + name="Rename", + after=EventFieldType.LATENCY, ) apply_storey_aggregations() @@ -243,7 +235,7 @@ def apply_storey_sample_window(): graph.add_step( "storey.steps.SampleWindow", name="sample", - after=EventFieldType.LATENCY, + after="Rename", window_size=self.sample_window, key=EventFieldType.ENDPOINT_ID, ) diff --git a/mlrun/package/packagers_manager.py b/mlrun/package/packagers_manager.py index 8258d961bb..48a1a434dd 100644 --- a/mlrun/package/packagers_manager.py +++ b/mlrun/package/packagers_manager.py @@ -16,6 +16,7 @@ import inspect import os import shutil +import sys import traceback from typing import Any, Dict, List, Tuple, Type, Union @@ -264,7 +265,11 @@ def unpack(self, data_item: DataItem, type_hint: Type) -> Any: :return: The unpacked object parsed as type hinted. """ # Check if `DataItem` is hinted - meaning the user can expect a data item and do not want to unpack it: - if TypeHintUtils.is_matching(object_type=DataItem, type_hint=type_hint): + # TODO: Remove when we'll no longer support Python 3.7: + if sys.version_info[1] < 8: + if self._get_type_name(typ=DataItem) in str(type_hint): + return data_item + elif TypeHintUtils.is_matching(object_type=DataItem, type_hint=type_hint): return data_item # Set variables to hold the manager notes and packager instructions: diff --git a/mlrun/projects/project.py b/mlrun/projects/project.py index 402b8591d7..65b5c69fc2 100644 --- a/mlrun/projects/project.py +++ b/mlrun/projects/project.py @@ -15,6 +15,7 @@ import getpass import glob import http +import importlib.util as imputil import json import os.path import pathlib @@ -114,6 +115,7 @@ def new_project( subpath: str = None, save: bool = True, overwrite: bool = False, + parameters: dict = None, ) -> "MlrunProject": """Create a new MLRun project, optionally load it from a yaml/zip/git template @@ -152,6 +154,7 @@ def new_project( :param save: whether to save the created project in the DB :param overwrite: overwrite project using 'cascade' deletion strategy (deletes project resources) if project with name exists + :param parameters: key/value pairs to add to the project.spec.params :returns: project object """ @@ -193,6 +196,10 @@ def new_project( project.spec.origin_url = url if description: project.spec.description = description + if parameters: + # Enable setting project parameters at load time, can be used to customize the project_setup + for key, val in parameters.items(): + project.spec.params[key] = val _set_as_current_default_project(project) @@ -220,6 +227,11 @@ def new_project( context=context, save=save, ) + if from_template: + # Hook for initializing the project using a project_setup script + setup_file_path = path.join(context, "project_setup.py") + project = _run_project_setup(project, setup_file_path, save) + return project @@ -234,6 +246,7 @@ def load_project( user_project: bool = False, save: bool = True, sync_functions: bool = False, + parameters: dict = None, ) -> "MlrunProject": """Load an MLRun project from git or tar or dir @@ -260,6 +273,7 @@ def load_project( :param user_project: add the current user name to the project name (for db:// prefixes) :param save: whether to save the created project and artifact in the DB :param sync_functions: sync the project's functions into the project object (will be saved to the DB if save=True) + :param parameters: key/value pairs to add to the project.spec.params :returns: project object """ @@ -306,6 +320,12 @@ def load_project( if not project.metadata.name: raise ValueError("project name must be specified") + + if parameters: + # Enable setting project parameters at load time, can be used to customize the project_setup + for key, val in parameters.items(): + project.spec.params[key] = val + if not from_db: project.spec.source = url or project.spec.source project.spec.origin_url = url or project.spec.origin_url @@ -320,14 +340,19 @@ def load_project( except Exception: pass - if save and mlrun.mlconf.dbpath: + to_save = save and mlrun.mlconf.dbpath + if to_save: project.save() + + # Hook for initializing the project using a project_setup script + setup_file_path = path.join(context, project.spec.subpath or "", "project_setup.py") + project = _run_project_setup(project, setup_file_path, to_save) + + if to_save: project.register_artifacts() - if sync_functions: - project.sync_functions(names=project.get_function_names(), save=True) - elif sync_functions: - project.sync_functions(names=project.get_function_names(), save=False) + if sync_functions: + project.sync_functions(save=to_save) _set_as_current_default_project(project) @@ -345,6 +370,7 @@ def get_or_create_project( user_project: bool = False, from_template: str = None, save: bool = True, + parameters: dict = None, ) -> "MlrunProject": """Load a project from MLRun DB, or create/import if doesnt exist @@ -367,6 +393,8 @@ def get_or_create_project( :param user_project: add the current username to the project name (for db:// prefixes) :param from_template: path to project YAML file that will be used as from_template (for new projects) :param save: whether to save the created project in the DB + :param parameters: key/value pairs to add to the project.spec.params + :returns: project object """ context = context or "./" @@ -386,6 +414,7 @@ def get_or_create_project( user_project=user_project, # only loading project from db so no need to save it save=False, + parameters=parameters, ) logger.info("Project loaded successfully", project_name=name) return project @@ -407,6 +436,7 @@ def get_or_create_project( clone=clone, user_project=user_project, save=save, + parameters=parameters, ) logger.info( @@ -427,14 +457,64 @@ def get_or_create_project( secrets=secrets, subpath=subpath, save=save, + parameters=parameters, ) logger.info("Project created successfully", project_name=name, stored_in_db=save) return project +def _run_project_setup( + project: "MlrunProject", setup_file_path: str, save: bool = False +): + """Run the project setup file if found + + When loading a project MLRun will look for a project_setup.py file, if it is found + it will execute the setup(project) handler, which can enrich the project with additional + objects, functions, artifacts, etc. + + Example:: + + def setup(project): + train_function = project.set_function( + "src/trainer.py", + name="mpi-training", + kind="mpijob", + image="mlrun/ml-models", + ) + # Set the number of replicas for the training from the project parameter + train_function.spec.replicas = project.spec.params.get("num_replicas", 1) + return project + + """ + if not path.exists(setup_file_path): + return project + spec = imputil.spec_from_file_location("workflow", setup_file_path) + if spec is None: + raise ImportError(f"cannot import project setup file in {setup_file_path}") + mod = imputil.module_from_spec(spec) + spec.loader.exec_module(mod) + + if hasattr(mod, "setup"): + try: + project = getattr(mod, "setup")(project) + except Exception as exc: + logger.error( + "Failed to run project_setup script", + setup_file_path=setup_file_path, + exc=mlrun.errors.err_to_str(exc), + ) + raise exc + if save: + project.save() + else: + logger.warn("skipping setup, setup() handler was not found in project_setup.py") + return project + + def _load_project_dir(context, name="", subpath=""): subpath_str = subpath or "" fpath = path.join(context, subpath_str, "project.yaml") + setup_file_path = path.join(context, subpath_str, "project_setup.py") if path.isfile(fpath): with open(fpath) as fp: data = fp.read() @@ -454,6 +534,9 @@ def _load_project_dir(context, name="", subpath=""): }, } ) + elif path.exists(setup_file_path): + # If there is a setup script do not force having project.yaml file + project = MlrunProject() else: raise mlrun.errors.MLRunNotFoundError( "project or function YAML not found in path" @@ -1126,8 +1209,15 @@ def set_workflow( :param ttl: pipeline ttl in secs (after that the pods will be removed) :param args: argument values (key=value, ..) """ - if not workflow_path: - raise ValueError("valid workflow_path must be specified") + + # validate the provided workflow_path + if mlrun.utils.helpers.is_file_path_invalid( + self.spec.get_code_path(), workflow_path + ): + raise ValueError( + f"Invalid 'workflow_path': '{workflow_path}'. Please provide a valid URL/path to a file." + ) + if embed: if ( self.context @@ -1211,11 +1301,13 @@ def register_artifacts(self): artifact_path = mlrun.utils.helpers.fill_artifact_path_template( self.spec.artifact_path or mlrun.mlconf.artifact_path, self.metadata.name ) + # TODO: To correctly maintain the list of artifacts from an exported project, + # we need to maintain the different trees that generated them producer = ArtifactProducer( "project", self.metadata.name, self.metadata.name, - tag=self._get_hexsha() or "latest", + tag=self._get_hexsha() or str(uuid.uuid4()), ) for artifact_dict in self.spec.artifacts: if _is_imported_artifact(artifact_dict): @@ -2705,8 +2797,9 @@ def list_artifacts( # check different artifact versions for a specific artifact, return as objects list result_versions = project.list_artifacts('results', tag='*').to_objects() - :param name: Name of artifacts to retrieve. Name is used as a like query, and is not case-sensitive. This means - that querying for ``name`` may return artifacts named ``my_Name_1`` or ``surname``. + :param name: Name of artifacts to retrieve. Name with '~' prefix is used as a like query, and is not + case-sensitive. This means that querying for ``~name`` may return artifacts named + ``my_Name_1`` or ``surname``. :param tag: Return artifacts assigned this tag. :param labels: Return artifacts that have these labels. Labels can either be a dictionary {"label": "value"} or a list of "label=value" (match label key and value) or "label" (match just label key) strings. @@ -2752,8 +2845,9 @@ def list_models( latest_models = project.list_models('', tag='latest') - :param name: Name of artifacts to retrieve. Name is used as a like query, and is not case-sensitive. This means - that querying for ``name`` may return artifacts named ``my_Name_1`` or ``surname``. + :param name: Name of artifacts to retrieve. Name with '~' prefix is used as a like query, and is not + case-sensitive. This means that querying for ``~name`` may return artifacts named + ``my_Name_1`` or ``surname``. :param tag: Return artifacts assigned this tag. :param labels: Return artifacts that have these labels. Labels can either be a dictionary {"label": "value"} or a list of "label=value" (match label key and value) or "label" (match just label key) strings. diff --git a/mlrun/render.py b/mlrun/render.py index 06c0e068c4..e5e9781b07 100644 --- a/mlrun/render.py +++ b/mlrun/render.py @@ -12,11 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. import pathlib +import typing import uuid from os import environ, path import pandas as pd +import mlrun.utils + from .config import config from .datastore import uri_to_ipython from .utils import dict_to_list, get_in, is_ipython @@ -72,7 +75,13 @@ def dict_html(x): return "".join([f'
{i}
' for i in dict_to_list(x)]) -def link_to_ipython(link): +def link_to_ipython(link: str): + """ + Convert a link (e.g. v3io path) to a jupyter notebook local link. + + :param link: the link to convert + :return: the converted link and ref for expanding the file in the notebook + """ valid = pathlib.Path(link).suffix in supported_viewers ref = 'class="artifact" onclick="expandPanel(this)" paneName="result" ' if "://" not in link: @@ -95,22 +104,42 @@ def link_html(text, link=""): return f'
{text}
' -def artifacts_html(x, pathcol="path"): - if not x: +def artifacts_html( + artifacts: typing.List[dict], + attribute_name: str = "path", +): + """ + Generate HTML for a list of artifacts. The HTML will be a list of links to the artifacts to be presented in the + jupyter notebook. The links will be clickable and will open the artifact in a new tab. + + :param artifacts: contains a list of artifact dictionaries + :param attribute_name: the attribute of the artifact to use as the link text + :return: the generated HTML + """ + if not artifacts: return "" html = "" - for i in x: - # support legacy format - if pathcol in i: - link, ref = link_to_ipython(i[pathcol]) + + for artifact in artifacts: + # TODO: remove this in 1.5.0 once we no longer support legacy format + if mlrun.utils.is_legacy_artifact(artifact): + attribute_value = artifact.get(attribute_name) else: - link, ref = link_to_ipython(i["spec"][pathcol]) + attribute_value = artifact["spec"].get(attribute_name) - if "key" in i: - key = i["key"] + if mlrun.utils.is_legacy_artifact(artifact): + key = artifact["key"] else: - key = i["metadata"]["key"] + key = artifact["metadata"]["key"] + + if not attribute_value: + mlrun.utils.logger.warning( + "Artifact is incomplete, omitting from output (most likely due to a failed artifact logging)", + artifact_key=key, + ) + continue + link, ref = link_to_ipython(attribute_value) html += f'
{key}
' return html @@ -338,7 +367,12 @@ def get_tblframe(df, display, classes=None): uid_template = '' -def runs_to_html(df, display=True, classes=None, short=False): +def runs_to_html( + df: pd.DataFrame, + display: bool = True, + classes: typing.Optional[typing.Union[str, list, tuple]] = None, + short: bool = False, +): def time_str(x): try: return x.strftime("%b %d %H:%M:%S") @@ -374,17 +408,13 @@ def time_str(x): df["labels"] = df["labels"].apply(dict_html) df["inputs"] = df["inputs"].apply(inputs_html) df["artifacts"] = df["artifacts"].apply( - lambda x: artifacts_html(x, "target_path") + lambda artifacts: artifacts_html(artifacts, "target_path"), ) def expand_error(x): if x["state"] == "error": title = str(x["error"]) - state = f'
' - - # TODO: is this replacement needed? - state.replace('"', "'") - state += f'{x["state"]}
' + state = f'
{x["state"]}
' x["state"] = state return x diff --git a/mlrun/runtimes/base.py b/mlrun/runtimes/base.py index e59d292610..bfff27c0f5 100644 --- a/mlrun/runtimes/base.py +++ b/mlrun/runtimes/base.py @@ -14,6 +14,7 @@ import enum import getpass import http +import re import traceback import warnings from abc import ABC, abstractmethod @@ -28,8 +29,6 @@ from nuclio.build import mlrun_footer from sqlalchemy.orm import Session -import mlrun.api.db.sqldb.session -import mlrun.api.utils.singletons.db import mlrun.common.schemas import mlrun.errors import mlrun.launcher.factory @@ -599,6 +598,12 @@ def _force_handler(self, handler): if not handler: raise RunError(f"handler must be provided for {self.kind} runtime") + def _has_pipeline_param(self) -> bool: + # check if the runtime has pipeline parameters + # https://www.kubeflow.org/docs/components/pipelines/v1/sdk/parameters/ + matches = re.findall(mlrun.utils.regex.pipeline_param[0], self.to_json()) + return bool(matches) + def full_image_path( self, image=None, client_version: str = None, client_python_version: str = None ): @@ -678,10 +683,9 @@ def as_step( :return: KubeFlow containerOp """ - # if self.spec.image and not image: - # image = self.full_image_path() - - if use_db: + # if the function contain KFP PipelineParams (futures) pass the full spec to the + # ContainerOp this way KFP will substitute the params with previous step outputs + if use_db and not self._has_pipeline_param(): # if the same function is built as part of the pipeline we do not use the versioned function # rather the latest function w the same tag so we can pick up the updated image/status versioned = False if hasattr(self, "_build_in_pipeline") else True diff --git a/mlrun/runtimes/function.py b/mlrun/runtimes/function.py index 37b25f700d..f7f66c9b54 100644 --- a/mlrun/runtimes/function.py +++ b/mlrun/runtimes/function.py @@ -86,26 +86,6 @@ def validate_nuclio_version_compatibility(*min_versions): return False -def is_nuclio_version_in_range(min_version: str, max_version: str) -> bool: - """ - Return whether the Nuclio version is in the range, inclusive for min, exclusive for max - [min, max) - """ - try: - parsed_min_version = semver.VersionInfo.parse(min_version) - parsed_max_version = semver.VersionInfo.parse(max_version) - nuclio_version = mlrun.runtimes.utils.resolve_nuclio_version() - parsed_current_version = semver.VersionInfo.parse(nuclio_version) - except ValueError: - logger.warning( - "Unable to parse nuclio version, assuming in range", - nuclio_version=nuclio_version, - min_version=min_version, - max_version=max_version, - ) - return True - return parsed_min_version <= parsed_current_version < parsed_max_version - - def min_nuclio_versions(*versions): def decorator(function): def wrapper(*args, **kwargs): @@ -133,6 +113,7 @@ class NuclioSpec(KubeResourceSpec): "source", "function_kind", "readiness_timeout", + "readiness_timeout_before_failure", "function_handler", "nuclio_runtime", "base_image_pull", @@ -164,6 +145,7 @@ def __init__( build=None, service_account=None, readiness_timeout=None, + readiness_timeout_before_failure=None, default_handler=None, node_name=None, node_selector=None, @@ -219,6 +201,7 @@ def __init__( self.nuclio_runtime = None self.no_cache = no_cache self.readiness_timeout = readiness_timeout + self.readiness_timeout_before_failure = readiness_timeout_before_failure self.service_type = service_type self.add_templated_ingress_host_mode = add_templated_ingress_host_mode @@ -808,13 +791,12 @@ def deploy_step( # verify auto mount is applied (with the client credentials) self.try_auto_mount_based_on_config() - # if the function spec contain KFP PipelineParams (futures) pass the full spec to the - # ContainerOp this way KFP will substitute the params with previous step outputs - func_has_pipeline_params = self.to_json().find("{{pipelineparam:op") > 0 if ( use_function_from_db or use_function_from_db is None - and not func_has_pipeline_params + # if the function contain KFP PipelineParams (futures) pass the full spec to the + # ContainerOp this way KFP will substitute the params with previous step outputs + and not self._has_pipeline_param() ): url = self.save(versioned=True, refresh=True) else: @@ -842,6 +824,7 @@ def invoke( force_external_address: bool = False, auth_info: AuthInfo = None, mock: bool = None, + **http_client_kwargs, ): """Invoke the remote (live) function and return the results @@ -857,6 +840,9 @@ def invoke( :param force_external_address: use the external ingress URL :param auth_info: service AuthInfo :param mock: use mock server vs a real Nuclio function (for local simulations) + :param http_client_kwargs: allow the user to pass any parameter supported in requests.request method + see this link for more information: + https://requests.readthedocs.io/en/latest/api/#requests.request """ if not method: method = "POST" if body else "GET" @@ -888,15 +874,16 @@ def invoke( self.metadata.name, self.metadata.project, self.metadata.tag ) headers.setdefault("x-nuclio-target", full_function_name) - kwargs = {} + if not http_client_kwargs: + http_client_kwargs = {} if body: if isinstance(body, (str, bytes)): - kwargs["data"] = body + http_client_kwargs["data"] = body else: - kwargs["json"] = body + http_client_kwargs["json"] = body try: logger.info("invoking function", method=method, path=path) - resp = requests.request(method, path, headers=headers, **kwargs) + resp = requests.request(method, path, headers=headers, **http_client_kwargs) except OSError as err: raise OSError( f"error: cannot run function at url {path}, {err_to_str(err)}" diff --git a/mlrun/runtimes/pod.py b/mlrun/runtimes/pod.py index 86907b5145..4de72f74fb 100644 --- a/mlrun/runtimes/pod.py +++ b/mlrun/runtimes/pod.py @@ -360,15 +360,25 @@ def with_limits( patch: bool = False, ): """ - set pod cpu/memory/gpu limits - by default it overrides the whole limits section, if you wish to patch specific resources use `patch=True`. + Set pod cpu/memory/gpu limits (max values) + + :param mem: set limit for memory e.g. '500M', '2G', etc. + :param cpu: set limit for cpu e.g. '0.5', '2', etc. + :param gpus: set limit for gpu + :param gpu_type: set gpu type e.g. "nvidia.com/gpu" + :param patch: by default it overrides the whole limits section, + if you wish to patch specific resources use `patch=True` """ self._verify_and_set_limits("resources", mem, cpu, gpus, gpu_type, patch=patch) def with_requests(self, mem: str = None, cpu: str = None, patch: bool = False): """ - set requested (desired) pod cpu/memory resources - by default it overrides the whole requests section, if you wish to patch specific resources use `patch=True`. + Set requested (desired) pod cpu/memory resources + + :param mem: set request for memory e.g. '200M', '1G', etc. + :param cpu: set request for cpu e.g. '0.1', '1', etc. + :param patch: by default it overrides the whole requests section, + if you wish to patch specific resources use `patch=True` """ self._verify_and_set_requests("resources", mem, cpu, patch) @@ -1041,15 +1051,25 @@ def with_limits( patch: bool = False, ): """ - set pod cpu/memory/gpu limits - by default it overrides the whole limits section, if you wish to patch specific resources use `patch=True`. + Set pod cpu/memory/gpu limits (max values) + + :param mem: set limit for memory e.g. '500M', '2G', etc. + :param cpu: set limit for cpu e.g. '0.5', '2', etc. + :param gpus: set limit for gpu + :param gpu_type: set gpu type e.g. "nvidia.com/gpu" + :param patch: by default it overrides the whole limits section, + if you wish to patch specific resources use `patch=True` """ self.spec.with_limits(mem, cpu, gpus, gpu_type, patch=patch) def with_requests(self, mem: str = None, cpu: str = None, patch: bool = False): """ - set requested (desired) pod cpu/memory resources - by default it overrides the whole requests section, if you wish to patch specific resources use `patch=True`. + Set requested (desired) pod cpu/memory resources + + :param mem: set request for memory e.g. '200M', '1G', etc. + :param cpu: set request for cpu e.g. '0.1', '1', etc. + :param patch: by default it overrides the whole requests section, + if you wish to patch specific resources use `patch=True` """ self.spec.with_requests(mem, cpu, patch=patch) diff --git a/mlrun/runtimes/serving.py b/mlrun/runtimes/serving.py index 5ba3a18979..b989a29443 100644 --- a/mlrun/runtimes/serving.py +++ b/mlrun/runtimes/serving.py @@ -116,6 +116,7 @@ def __init__( function_kind=None, service_account=None, readiness_timeout=None, + readiness_timeout_before_failure=None, models=None, graph=None, parameters=None, @@ -168,6 +169,7 @@ def __init__( function_kind=serving_subkind, service_account=service_account, readiness_timeout=readiness_timeout, + readiness_timeout_before_failure=readiness_timeout_before_failure, build=build, node_name=node_name, node_selector=node_selector, diff --git a/mlrun/runtimes/utils.py b/mlrun/runtimes/utils.py index 4260c386b7..7b39d9de9f 100644 --- a/mlrun/runtimes/utils.py +++ b/mlrun/runtimes/utils.py @@ -24,10 +24,9 @@ from kubernetes import client import mlrun -import mlrun.api.utils.builder import mlrun.common.constants +import mlrun.common.schemas import mlrun.utils.regex -from mlrun.api.utils.clients import nuclio from mlrun.errors import err_to_str from mlrun.frameworks.parallel_coordinates import gen_pcp_plot from mlrun.runtimes.constants import MPIJobCRDVersions @@ -61,7 +60,6 @@ def set(self, context): cached_mpijob_crd_version = None -cached_nuclio_version = None # resolve mpijob runtime according to the mpi-operator's supported crd-version @@ -118,29 +116,6 @@ def resolve_spark_operator_version(): raise ValueError("Failed to resolve spark operator's version") -# if nuclio version specified on mlrun config set it likewise, -# if not specified, get it from nuclio api client -# since this is a heavy operation (sending requests to API), and it's unlikely that the version -# will change - cache it (this means if we upgrade nuclio, we need to restart mlrun to re-fetch the new version) -def resolve_nuclio_version(): - global cached_nuclio_version - - if not cached_nuclio_version: - - # config override everything - nuclio_version = config.nuclio_version - if not nuclio_version and config.nuclio_dashboard_url: - try: - nuclio_client = nuclio.Client() - nuclio_version = nuclio_client.get_dashboard_version() - except Exception as exc: - logger.warning("Failed to resolve nuclio version", exc=err_to_str(exc)) - - cached_nuclio_version = nuclio_version - - return cached_nuclio_version - - def calc_hash(func, tag=""): # remove tag, hash, date from calculation tag = tag or func.metadata.tag @@ -208,8 +183,18 @@ def add_code_metadata(path=""): ] if len(remotes) > 0: return f"{remotes[0]}#{repo.head.commit.hexsha}" - except (GitCommandNotFound, InvalidGitRepositoryError, NoSuchPathError, ValueError): - pass + + except ( + GitCommandNotFound, + InvalidGitRepositoryError, + NoSuchPathError, + ValueError, + ) as exc: + logger.warning( + "Failed to add git metadata, ignore if path is not part of a git repo.", + path=path, + error=err_to_str(exc), + ) return None @@ -474,20 +459,26 @@ def verify_limits( verify_field_regex( f"function.spec.{resources_field_name}.limits.memory", mem, - mlrun.utils.regex.k8s_resource_quantity_regex, + mlrun.utils.regex.k8s_resource_quantity_regex + + mlrun.utils.regex.pipeline_param, + mode=mlrun.common.schemas.RegexMatchModes.any, ) if cpu: verify_field_regex( f"function.spec.{resources_field_name}.limits.cpu", cpu, - mlrun.utils.regex.k8s_resource_quantity_regex, + mlrun.utils.regex.k8s_resource_quantity_regex + + mlrun.utils.regex.pipeline_param, + mode=mlrun.common.schemas.RegexMatchModes.any, ) # https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/ if gpus: verify_field_regex( f"function.spec.{resources_field_name}.limits.gpus", gpus, - mlrun.utils.regex.k8s_resource_quantity_regex, + mlrun.utils.regex.k8s_resource_quantity_regex + + mlrun.utils.regex.pipeline_param, + mode=mlrun.common.schemas.RegexMatchModes.any, ) return generate_resources(mem=mem, cpu=cpu, gpus=gpus, gpu_type=gpu_type) @@ -501,13 +492,17 @@ def verify_requests( verify_field_regex( f"function.spec.{resources_field_name}.requests.memory", mem, - mlrun.utils.regex.k8s_resource_quantity_regex, + mlrun.utils.regex.k8s_resource_quantity_regex + + mlrun.utils.regex.pipeline_param, + mode=mlrun.common.schemas.RegexMatchModes.any, ) if cpu: verify_field_regex( f"function.spec.{resources_field_name}.requests.cpu", cpu, - mlrun.utils.regex.k8s_resource_quantity_regex, + mlrun.utils.regex.k8s_resource_quantity_regex + + mlrun.utils.regex.pipeline_param, + mode=mlrun.common.schemas.RegexMatchModes.any, ) return generate_resources(mem=mem, cpu=cpu) diff --git a/mlrun/serving/states.py b/mlrun/serving/states.py index c387284311..be58d3c2d5 100644 --- a/mlrun/serving/states.py +++ b/mlrun/serving/states.py @@ -291,11 +291,12 @@ def to( ): """add a step right after this step and return the new step - example, a 4 step pipeline ending with a stream: - graph.to('URLDownloader')\ - .to('ToParagraphs')\ - .to(name='to_json', handler='json.dumps')\ - .to('>>', 'to_v3io', path=stream_path)\ + example: + a 4-step pipeline ending with a stream: + graph.to('URLDownloader')\ + .to('ToParagraphs')\ + .to(name='to_json', handler='json.dumps')\ + .to('>>', 'to_v3io', path=stream_path)\ :param class_name: class name or step object to build the step from for router steps the class name should start with '*' @@ -306,7 +307,7 @@ def to( :param function: function this step should run in :param full_event: this step accepts the full event (not just body) :param input_path: selects the key/path in the event to use as input to the step - this require that the event body will behave like a dict, example: + this requires that the event body will behave like a dict, example: event: {"data": {"a": 5, "b": 7}}, input_path="data.b" means the step will receive 7 as input :param result_path: selects the key/path in the event to write the results to diff --git a/mlrun/utils/db.py b/mlrun/utils/db.py index a75e200049..11f1bd0fc8 100644 --- a/mlrun/utils/db.py +++ b/mlrun/utils/db.py @@ -17,6 +17,8 @@ from sqlalchemy.orm import class_mapper +run_time_fmt = "%Y-%m-%dT%H:%M:%S.%fZ" + class BaseModel: def to_dict(self, exclude=None): diff --git a/mlrun/utils/helpers.py b/mlrun/utils/helpers.py index 397f8dddc2..b71101fed6 100644 --- a/mlrun/utils/helpers.py +++ b/mlrun/utils/helpers.py @@ -17,6 +17,7 @@ import inspect import json import os +import pathlib import re import sys import time @@ -39,6 +40,7 @@ from yaml.representer import RepresenterError import mlrun +import mlrun.common.schemas import mlrun.errors import mlrun.utils.version.version from mlrun.errors import err_to_str @@ -148,6 +150,7 @@ def verify_field_regex( patterns, raise_on_failure: bool = True, log_message: str = "Field is malformed. Does not match required pattern", + mode: mlrun.common.schemas.RegexMatchModes = mlrun.common.schemas.RegexMatchModes.all, ) -> bool: for pattern in patterns: if not re.match(pattern, str(field_value)): @@ -158,13 +161,23 @@ def verify_field_regex( field_value=field_value, pattern=pattern, ) - if raise_on_failure: - raise mlrun.errors.MLRunInvalidArgumentError( - f"Field '{field_name}' is malformed. Does not match required pattern: {pattern}" - ) - else: + if mode == mlrun.common.schemas.RegexMatchModes.all: + if raise_on_failure: + raise mlrun.errors.MLRunInvalidArgumentError( + f"Field '{field_name}' is malformed. {field_value} does not match required pattern: {pattern}" + ) return False - return True + elif mode == mlrun.common.schemas.RegexMatchModes.any: + return True + if mode == mlrun.common.schemas.RegexMatchModes.all: + return True + elif mode == mlrun.common.schemas.RegexMatchModes.any: + if raise_on_failure: + raise mlrun.errors.MLRunInvalidArgumentError( + f"Field '{field_name}' is malformed. {field_value} does not match any of the" + f" required patterns: {patterns}" + ) + return False def validate_builder_source( @@ -233,6 +246,34 @@ def get_regex_list_as_string(regex_list: List) -> str: return "".join(["(?={regex})".format(regex=regex) for regex in regex_list]) + ".*$" +def is_file_path_invalid(code_path: str, file_path: str) -> bool: + """ + The function checks if the given file_path is a valid path. + If the file_path is a relative path, it is completed by joining it with the code_path. + Otherwise, the file_path is used as is. + Additionally, it checks if the resulting path exists as a file, unless the file_path is a remote URL. + If the file_path has no suffix, it is considered invalid. + + :param code_path: The base directory or code path to search for the file in case of relative file_path + :param file_path: The file path to be validated + :return: True if the file path is invalid, False otherwise + """ + if not file_path: + return True + + if file_path.startswith("./") or ( + "://" not in file_path and os.path.basename(file_path) == file_path + ): + abs_path = os.path.join(code_path, file_path.lstrip("./")) + else: + abs_path = file_path + + return ( + not (os.path.isfile(abs_path) or "://" in file_path) + or not pathlib.Path(file_path).suffix + ) + + def tag_name_regex_as_string() -> str: return get_regex_list_as_string(mlrun.utils.regex.tag_name) @@ -1266,6 +1307,43 @@ def is_legacy_artifact(artifact): return not hasattr(artifact, "metadata") +def format_run(run: dict, with_project=False) -> dict: + fields = [ + "id", + "name", + "status", + "error", + "created_at", + "scheduled_at", + "finished_at", + "description", + ] + + if with_project: + fields.append("project") + + # create a run object that contains all fields, + run = { + key: str(value) if value is not None else value + for key, value in run.items() + if key in fields + } + + # if the time_keys values is from 1970, this indicates that the field has not yet been specified yet, + # and we want to return a None value instead + time_keys = ["scheduled_at", "finished_at", "created_at"] + + for key, value in run.items(): + if ( + key in time_keys + and isinstance(value, (str, datetime)) + and parser.parse(str(value)).year == 1970 + ): + run[key] = None + + return run + + def get_in_artifact(artifact: dict, key, default=None, raise_on_missing=False): """artifact can be dict or Artifact object""" if is_legacy_artifact(artifact): @@ -1302,6 +1380,18 @@ def is_relative_path(path): return not (path.startswith("/") or ":\\" in path or "://" in path) +def is_running_in_jupyter_notebook() -> bool: + """ + Check if the code is running inside a Jupyter Notebook. + :return: True if running inside a Jupyter Notebook, False otherwise. + """ + import IPython + + ipy = IPython.get_ipython() + # if its IPython terminal, it isn't a Jupyter ipython + return ipy and "Terminal" not in str(type(ipy)) + + def as_number(field_name, field_value): if isinstance(field_value, str) and not field_value.isnumeric(): raise ValueError(f"{field_name} must be numeric (str/int types)") diff --git a/mlrun/utils/http.py b/mlrun/utils/http.py index 3a66dd2972..85f27c2d82 100644 --- a/mlrun/utils/http.py +++ b/mlrun/utils/http.py @@ -14,6 +14,7 @@ # import time +import typing import requests import requests.adapters @@ -82,14 +83,15 @@ def __init__( self.retry_on_exception = retry_on_exception self.verbose = verbose self._logger = logger.get_child("http-client") + self._retry_methods = self._resolve_retry_methods(retry_on_post) if retry_on_status: - http_adapter = requests.adapters.HTTPAdapter( + self._http_adapter = requests.adapters.HTTPAdapter( max_retries=urllib3.util.retry.Retry( total=self.max_retries, backoff_factor=self.retry_backoff_factor, status_forcelist=config.http_retry_defaults.status_codes, - method_whitelist=self._get_retry_methods(retry_on_post), + method_whitelist=self._retry_methods, # we want to retry but not to raise since we do want that last response (to parse details on the # error from response body) we'll handle raising ourselves raise_on_status=False, @@ -97,8 +99,8 @@ def __init__( pool_maxsize=int(config.httpdb.max_workers), ) - self.mount("http://", http_adapter) - self.mount("https://", http_adapter) + self.mount("http://", self._http_adapter) + self.mount("https://", self._http_adapter) def request(self, method, url, **kwargs): retry_count = 0 @@ -111,42 +113,7 @@ def request(self, method, url, **kwargs): response = super().request(method, url, **kwargs) return response except Exception as exc: - if not self.retry_on_exception: - self._log_exception( - "warning", - exc, - f"{method} {url} request failed, http retries disabled," - f" raising exception: {err_to_str(exc)}", - retry_count, - ) - raise exc - - if retry_count >= self.max_retries: - self._log_exception( - "warning", - exc, - f"{method} {url} request failed, max retries reached," - f" raising exception: {err_to_str(exc)}", - retry_count, - ) - raise exc - - # only retryable exceptions - exception_is_retryable = any( - msg in str(exc) for msg in self.HTTP_RETRYABLE_EXCEPTION_STRINGS - ) or any( - isinstance(exc, retryable_exc) - for retryable_exc in self.HTTP_RETRYABLE_EXCEPTIONS - ) - - if not exception_is_retryable: - self._log_exception( - "warning", - exc, - f"{method} {url} request failed on non-retryable exception," - f" raising exception: {err_to_str(exc)}", - retry_count, - ) + if not self._error_is_retryable(url, method, exc, retry_count): raise exc self._logger.warning( @@ -167,15 +134,68 @@ def request(self, method, url, **kwargs): retry_count += 1 time.sleep(self.retry_backoff_factor) - @staticmethod - def _get_retry_methods(retry_on_post=False): - return ( - # setting to False in order to retry on all methods, otherwise every method except POST. - False - if retry_on_post - else urllib3.util.retry.Retry.DEFAULT_ALLOWED_METHODS + def _error_is_retryable(self, url, method, exc, retry_count): + if not self.retry_on_exception: + self._log_exception( + "warning", + exc, + f"{method} {url} request failed, http retries disabled," + f" raising exception: {err_to_str(exc)}", + retry_count, + ) + return False + + # if the response is not retryable, stop retrying + # this is done to prevent the retry logic from running on non-idempotent methods (such as POST). + if not self._method_retryable(method): + self._log_exception( + "warning", + exc, + f"{method} {url} request failed, http retries disabled for {method} method.", + retry_count, + ) + return False + + if retry_count >= self.max_retries: + self._log_exception( + "warning", + exc, + f"{method} {url} request failed, max retries reached," + f" raising exception: {err_to_str(exc)}", + retry_count, + ) + return False + + # only retryable exceptions + exception_is_retryable = any( + msg in str(exc) for msg in self.HTTP_RETRYABLE_EXCEPTION_STRINGS + ) or any( + isinstance(exc, retryable_exc) + for retryable_exc in self.HTTP_RETRYABLE_EXCEPTIONS ) + if not exception_is_retryable: + self._log_exception( + "warning", + exc, + f"{method} {url} request failed on non-retryable exception," + f" raising exception: {err_to_str(exc)}", + retry_count, + ) + return False + return True + + def _method_retryable(self, method: str): + return method in self._retry_methods + + def _resolve_retry_methods( + self, retry_on_post: bool = False + ) -> typing.FrozenSet[str]: + methods = urllib3.util.retry.Retry.DEFAULT_ALLOWED_METHODS + if retry_on_post: + methods = methods.union({"POST"}) + return frozenset(methods) + def _log_exception(self, level, exc, message, retry_count): getattr(self._logger, level)( message, diff --git a/mlrun/utils/notifications/notification/git.py b/mlrun/utils/notifications/notification/git.py index 5ab1c3ca4e..e9f4f8e088 100644 --- a/mlrun/utils/notifications/notification/git.py +++ b/mlrun/utils/notifications/notification/git.py @@ -114,11 +114,11 @@ async def _pr_comment( with open(os.environ["GITHUB_EVENT_PATH"]) as fp: data = fp.read() event = json.loads(data) - if "issue" not in event: + if "number" not in event: raise mlrun.errors.MLRunInvalidArgumentError( f"issue not found in github actions event\ndata={data}" ) - issue = event["issue"].get("number") + issue = event["number"] headers = { "Accept": "application/vnd.github.v3+json", "Authorization": f"token {token}", diff --git a/mlrun/utils/notifications/notification/ipython.py b/mlrun/utils/notifications/notification/ipython.py index 7079a30275..0c6ea383d2 100644 --- a/mlrun/utils/notifications/notification/ipython.py +++ b/mlrun/utils/notifications/notification/ipython.py @@ -36,9 +36,7 @@ def __init__( try: import IPython - ipy = IPython.get_ipython() - # if its IPython terminal ignore (can't show html) - if ipy and "Terminal" not in str(type(ipy)): + if mlrun.utils.helpers.is_running_in_jupyter_notebook(): self._ipython = IPython except ImportError: pass diff --git a/mlrun/utils/notifications/notification_pusher.py b/mlrun/utils/notifications/notification_pusher.py index 2dbfaa2210..898b381821 100644 --- a/mlrun/utils/notifications/notification_pusher.py +++ b/mlrun/utils/notifications/notification_pusher.py @@ -16,6 +16,7 @@ import datetime import os import typing +from concurrent.futures import ThreadPoolExecutor from fastapi.concurrency import run_in_threadpool @@ -107,16 +108,22 @@ async def _async_push(): # first push async notifications main_event_loop = asyncio.get_event_loop() - if main_event_loop.is_running(): - - # If running from the api or from jupyter notebook, we are already in an event loop. - # We add the async push function to the loop and run it. - asyncio.run_coroutine_threadsafe(_async_push(), main_event_loop) - else: - + if not main_event_loop.is_running(): # If running mlrun SDK locally (not from jupyter), there isn't necessarily an event loop. # We create a new event loop and run the async push function in it. main_event_loop.run_until_complete(_async_push()) + elif mlrun.utils.helpers.is_running_in_jupyter_notebook(): + # Running in Jupyter notebook. + # In this case, we need to create a new thread, run a separate event loop in + # that thread, and use it instead of the main_event_loop. + # This is necessary because Jupyter Notebook has its own event loop, + # but it runs in the main thread. As long as a cell is running, + # the event loop will not execute properly + _run_coroutine_in_jupyter_notebook(coroutine_method=_async_push) + else: + # Running in mlrun api, we are in a separate thread from the one in which + # the main event loop, so we can just send the notifications to that loop + asyncio.run_coroutine_threadsafe(_async_push(), main_event_loop) # then push sync notifications if not mlrun.config.is_running_as_api(): @@ -328,16 +335,27 @@ async def _async_push(): tasks.append( notification.push(message, severity, runs, custom_html) ) - # return exceptions to "best-effort" fire all notifications await asyncio.gather(*tasks, return_exceptions=True) # first push async notifications main_event_loop = asyncio.get_event_loop() - if main_event_loop.is_running(): - asyncio.run_coroutine_threadsafe(_async_push(), main_event_loop) - else: + if not main_event_loop.is_running(): + # If running mlrun SDK locally (not from jupyter), there isn't necessarily an event loop. + # We create a new event loop and run the async push function in it. main_event_loop.run_until_complete(_async_push()) + elif mlrun.utils.helpers.is_running_in_jupyter_notebook(): + # Running in Jupyter notebook. + # In this case, we need to create a new thread, run a separate event loop in + # that thread, and use it instead of the main_event_loop. + # This is necessary because Jupyter Notebook has its own event loop, + # but it runs in the main thread. As long as a cell is running, + # the event loop will not execute properly + _run_coroutine_in_jupyter_notebook(coroutine_method=_async_push) + else: + # Running in mlrun api, we are in a separate thread from the one in which + # the main event loop, so we can just send the notifications to that loop + asyncio.run_coroutine_threadsafe(_async_push(), main_event_loop) # then push sync notifications if not mlrun.config.is_running_as_api(): @@ -352,13 +370,29 @@ def add_notification( self._sync_notifications[notification_type].load_notification(params) else: notification = NotificationTypes(notification_type).get_notification()( - params + params=params, ) if notification.is_async: self._async_notifications[notification_type] = notification else: self._sync_notifications[notification_type] = notification + def remove_notification(self, notification_type: str): + if notification_type in self._async_notifications: + del self._async_notifications[notification_type] + + elif notification_type in self._sync_notifications: + del self._sync_notifications[notification_type] + + else: + logger.warning(f"No notification of type {notification_type} in project") + + def edit_notification( + self, notification_type: str, params: typing.Dict[str, str] = None + ): + self.remove_notification(notification_type) + self.add_notification(notification_type, params) + def should_push_notification(self, notification_type): notifications = {} notifications.update(self._sync_notifications) @@ -389,7 +423,7 @@ def push_pipeline_start_message( pipeline_id: str = None, has_workflow_url: bool = False, ): - message = f"Pipeline started in project {project}" + message = f"Workflow started in project {project}" if pipeline_id: message += f" id={pipeline_id}" commit_id = ( @@ -458,3 +492,26 @@ def _separate_sync_notifications( else: sync_notifications.append(notification) return sync_notifications, async_notifications + + +def _run_coroutine_in_jupyter_notebook(coroutine_method): + """ + Execute a coroutine in a Jupyter Notebook environment. + + This function creates a new thread pool executor with a single thread and a new event loop. + It sets the created event loop as the current event loop. + Then, it submits the coroutine to the event loop and waits for its completion. + + This approach is used in Jupyter Notebook to ensure the proper execution of the event loop in a separate thread, + allowing for the asynchronous push operation to be executed while the notebook is running. + + :param coroutine_method: The coroutine method to be executed. + :return: The result of the executed coroutine. + """ + thread_pool_executer = ThreadPoolExecutor(1) + async_event_loop = asyncio.new_event_loop() + thread_pool_executer.submit(asyncio.set_event_loop, async_event_loop).result() + result = thread_pool_executer.submit( + async_event_loop.run_until_complete, coroutine_method() + ).result() + return result diff --git a/mlrun/utils/regex.py b/mlrun/utils/regex.py index 38e0fee295..8c7752ed64 100644 --- a/mlrun/utils/regex.py +++ b/mlrun/utils/regex.py @@ -13,6 +13,11 @@ # limitations under the License. # +# pipeline param format which is passed when running a pipeline (e.g. {{pipelineparam:op=;name=mem}}) +# https://github.com/kubeflow/pipelines/blob/16edebf4eaf84cd7478e2601ef4878ab339a7854/sdk/python/kfp/dsl/_pipeline_param.py#L213 +# this is expected to be resolved at runtime +pipeline_param = [r"{{pipelineparam:op=([\w\s_-]*);name=([\w\s_-]+)}}"] + # k8s character limit is for 63 characters k8s_character_limit = [r"^.{0,63}$"] diff --git a/requirements.txt b/requirements.txt index 115084a1bc..7b45cc8b5d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,7 +16,7 @@ nest-asyncio~=1.0 # ipython 8.0 + only supports python3.8 +, so to keep backwards compatibility with python 3.7 we support 7.x # we rely on pip and nuclio-jupyter requirements to install the right package per python version ipython>=7.0, <9.0 -nuclio-jupyter~=0.9.10 +nuclio-jupyter~=0.9.11 # >=1.16.5 from pandas 1.2.1 and <1.23.0 from storey numpy>=1.16.5, <1.23.0 # limiting pandas to <1.5.0 since 1.5.0 causes exception in storey on casting from ns to us @@ -31,8 +31,10 @@ requests~=2.22 sqlalchemy~=1.4 # >=0.8.6 from kfp 1.6.0 (and still up until 1.8.10) tabulate~=0.8.6 -v3io~=0.5.20 -pydantic~=1.5 +v3io~=0.5.21 +# pydantic 1.10.8 fixes a bug with literal and typing-extension 4.6.0 +# https://docs.pydantic.dev/latest/changelog/#v1108-2023-05-23 +pydantic~=1.10, >=1.10.8 # blacklist 3.8.12 due to a bug not being able to collect traceback of exceptions orjson~=3.3, <3.8.12 alembic~=1.9 @@ -49,7 +51,7 @@ humanfriendly~=9.2 fastapi~=0.95.2 fsspec~=2023.1.0 v3iofs~=0.1.15 -storey~=1.4.3 +storey~=1.4.4 deepdiff~=5.0 pymysql~=1.0 inflection~=0.5.0 diff --git a/setup.py b/setup.py index d4f5d48e6f..481d4de333 100644 --- a/setup.py +++ b/setup.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + try: from setuptools import setup except ImportError: @@ -18,6 +19,7 @@ import json import logging +import re import dependencies import packages @@ -30,7 +32,9 @@ def version(): try: with open("mlrun/utils/version/version.json") as version_file: version_metadata = json.load(version_file) - return version_metadata["version"] + version_ = version_metadata["version"] + # replace "1.4.0-rc1+rca" with "1.4.0rc1+rca" + return re.sub(r"(\d+\.\d+\.\d+)-rc(\d+)", r"\1rc\2", version_) except (ValueError, KeyError, FileNotFoundError): # When installing un-released version (e.g. by doing # pip install git+https://github.com/mlrun/mlrun@development) @@ -72,6 +76,7 @@ def version(): "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", "Programming Language :: Python", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Software Development :: Libraries", diff --git a/tests/api/api/feature_store/base.py b/tests/api/api/feature_store/base.py index 77318dabb4..dcdda0d8ce 100644 --- a/tests/api/api/feature_store/base.py +++ b/tests/api/api/feature_store/base.py @@ -83,11 +83,17 @@ def _patch_object( # There will be fields added (uid for example), but we don't allow any other changes def _assert_diff_as_expected_except_for_specific_metadata( - expected_object, actual_object, allowed_metadata_fields, expected_diff={} + expected_object, + actual_object, + allowed_metadata_fields, + expected_diff={}, + allowed_spec_fields=[], ): exclude_paths = [] for field in allowed_metadata_fields: exclude_paths.append(f"root['metadata']['{field}']") + for field in allowed_spec_fields: + exclude_paths.append(f"root['spec']['{field}']") diff = DeepDiff( expected_object, actual_object, diff --git a/tests/api/api/feature_store/test_feature_sets.py b/tests/api/api/feature_store/test_feature_sets.py index ddbccdc359..66c65fa713 100644 --- a/tests/api/api/feature_store/test_feature_sets.py +++ b/tests/api/api/feature_store/test_feature_sets.py @@ -778,7 +778,10 @@ def test_unversioned_feature_set_actions(db: Session, client: TestClient) -> Non allowed_added_fields = ["created", "updated", "tag", "uid", "project"] _assert_diff_as_expected_except_for_specific_metadata( - feature_set, feature_set_response, allowed_added_fields + feature_set, + feature_set_response, + allowed_added_fields, + allowed_spec_fields=["engine"], ) assert feature_set_response["metadata"]["uid"] is None @@ -805,7 +808,10 @@ def test_unversioned_feature_set_actions(db: Session, client: TestClient) -> Non ) _assert_diff_as_expected_except_for_specific_metadata( - feature_set, feature_set_response, allowed_added_fields + feature_set, + feature_set_response, + allowed_added_fields, + allowed_spec_fields=["engine"], ) assert feature_set_response["metadata"]["uid"] is None diff --git a/tests/api/api/test_frontend_spec.py b/tests/api/api/test_frontend_spec.py index e5376168ae..c42924dd5a 100644 --- a/tests/api/api/test_frontend_spec.py +++ b/tests/api/api/test_frontend_spec.py @@ -22,6 +22,7 @@ import mlrun.api.crud import mlrun.api.utils.builder import mlrun.api.utils.clients.iguazio +import mlrun.api.utils.runtimes.nuclio import mlrun.common.schemas import mlrun.errors import mlrun.runtimes @@ -186,7 +187,7 @@ def test_get_frontend_spec_nuclio_streams( }, ]: # init cached value to None in the beginning of each test case - mlrun.runtimes.utils.cached_nuclio_version = None + mlrun.api.utils.runtimes.nuclio.cached_nuclio_version = None mlrun.mlconf.igz_version = test_case.get("iguazio_version") mlrun.mlconf.nuclio_version = test_case.get("nuclio_version") diff --git a/tests/api/api/test_pipelines.py b/tests/api/api/test_pipelines.py index a0e31163ce..cde13995e7 100644 --- a/tests/api/api/test_pipelines.py +++ b/tests/api/api/test_pipelines.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import datetime import http import importlib import json @@ -162,6 +163,46 @@ def test_get_pipeline_specific_project( importlib.reload(mlrun.api.crud) +def test_list_pipelines_time_fields_default( + db: sqlalchemy.orm.Session, + client: fastapi.testclient.TestClient, + kfp_client_mock: kfp.Client, +) -> None: + created_at = datetime.datetime.now() + workflow_manifest = _generate_workflow_manifest() + runs = [ + kfp_server_api.models.api_run.ApiRun( + id="id1", + name="run", + description="desc", + created_at=created_at, + finished_at="1970-01-01 00:00:00+00:00", + scheduled_at="1970-01-01 00:00:00+00:00", + pipeline_spec=kfp_server_api.models.api_pipeline_spec.ApiPipelineSpec( + pipeline_id="pipe_id", + workflow_manifest=workflow_manifest, + ), + ) + ] + + _mock_list_runs(kfp_client_mock, runs) + response = client.get( + "projects/*/pipelines", + params={"format": mlrun.common.schemas.PipelinesFormat.metadata_only}, + ) + response = response.json()["runs"][0] + + assert response["created_at"] == str(created_at) + assert not response["finished_at"], ( + "Expected value to be None after format," + " since field has not been specified yet" + ) + assert not response["scheduled_at"], ( + "Expected value to be None after format," + " since field has not been specified yet" + ) + + def test_list_pipelines_specific_project( db: sqlalchemy.orm.Session, client: fastapi.testclient.TestClient, diff --git a/tests/api/conftest.py b/tests/api/conftest.py index 4c6af3541a..a2e68278a5 100644 --- a/tests/api/conftest.py +++ b/tests/api/conftest.py @@ -24,19 +24,33 @@ from fastapi.testclient import TestClient import mlrun.api.utils.clients.iguazio +import mlrun.api.utils.runtimes.nuclio +import mlrun.api.utils.singletons.db import mlrun.api.utils.singletons.k8s +import mlrun.api.utils.singletons.logs_dir +import mlrun.api.utils.singletons.project_member +import mlrun.api.utils.singletons.scheduler import mlrun.common.schemas from mlrun import mlconf from mlrun.api.db.sqldb.session import _init_engine, create_session from mlrun.api.initial_data import init_data from mlrun.api.main import BASE_VERSIONED_API_PREFIX, app -from mlrun.api.utils.singletons.db import initialize_db -from mlrun.api.utils.singletons.project_member import initialize_project_member from mlrun.config import config from mlrun.secrets import SecretsStore from mlrun.utils import logger +@pytest.fixture(autouse=True) +def api_config_test(): + mlrun.api.utils.singletons.db.db = None + mlrun.api.utils.singletons.project_member.project_member = None + mlrun.api.utils.singletons.scheduler.scheduler = None + mlrun.api.utils.singletons.k8s._k8s = None + mlrun.api.utils.singletons.logs_dir.logs_dir = None + + mlrun.api.utils.runtimes.nuclio.cached_nuclio_version = None + + @pytest.fixture() def db() -> Generator: """ @@ -56,8 +70,8 @@ def db() -> Generator: # forcing from scratch because we created an empty file for the db init_data(from_scratch=True) - initialize_db() - initialize_project_member() + mlrun.api.utils.singletons.db.initialize_db() + mlrun.api.utils.singletons.project_member.initialize_project_member() # we're also running client code in tests so set dbpath as well # note that setting this attribute triggers connection to the run db therefore must happen after the initialization diff --git a/tests/api/runtimes/test_nuclio.py b/tests/api/runtimes/test_nuclio.py index 497f1de971..2437756754 100644 --- a/tests/api/runtimes/test_nuclio.py +++ b/tests/api/runtimes/test_nuclio.py @@ -30,6 +30,7 @@ import mlrun.api.crud.runtimes.nuclio.function import mlrun.api.crud.runtimes.nuclio.helpers +import mlrun.api.utils.runtimes.nuclio import mlrun.common.schemas import mlrun.errors import mlrun.runtimes.function @@ -981,7 +982,7 @@ def test_deploy_python_decode_string_env_var_enrichment( ) function = self._generate_runtime(self.runtime_kind) function.spec.nuclio_runtime = "python:3.7" - mlrun.runtimes.utils.cached_nuclio_version = "1.5.13" + mlrun.api.utils.runtimes.nuclio.cached_nuclio_version = "1.5.13" with pytest.raises( mlrun.errors.MLRunInvalidArgumentError, match=r"(.*)Nuclio version does not support(.*)", @@ -1002,7 +1003,7 @@ def test_deploy_python_decode_string_env_var_enrichment( logger.info("Function runtime is python, but nuclio is >=1.8.0 - do nothing") self._reset_mock() - mlrun.runtimes.utils.cached_nuclio_version = "1.8.5" + mlrun.api.utils.runtimes.nuclio.cached_nuclio_version = "1.8.5" function = self._generate_runtime(self.runtime_kind) self.execute_function(function) self._assert_deploy_called_basic_config( @@ -1015,7 +1016,7 @@ def test_deploy_python_decode_string_env_var_enrichment( "Function runtime is python, nuclio version in range, but already has the env var set - do nothing" ) self._reset_mock() - mlrun.runtimes.utils.cached_nuclio_version = "1.7.5" + mlrun.api.utils.runtimes.nuclio.cached_nuclio_version = "1.7.5" function = self._generate_runtime(self.runtime_kind) function.set_env(decode_event_strings_env_var_name, "false") self.execute_function(function) @@ -1029,7 +1030,7 @@ def test_deploy_python_decode_string_env_var_enrichment( "Function runtime is python, nuclio version in range, env var not set - add it" ) self._reset_mock() - mlrun.runtimes.utils.cached_nuclio_version = "1.7.5" + mlrun.api.utils.runtimes.nuclio.cached_nuclio_version = "1.7.5" function = self._generate_runtime(self.runtime_kind) self.execute_function(function) self._assert_deploy_called_basic_config( @@ -1039,7 +1040,7 @@ def test_deploy_python_decode_string_env_var_enrichment( ) def test_is_nuclio_version_in_range(self): - mlrun.runtimes.utils.cached_nuclio_version = "1.7.2" + mlrun.api.utils.runtimes.nuclio.cached_nuclio_version = "1.7.2" assert not mlrun.api.crud.runtimes.nuclio.helpers.is_nuclio_version_in_range( "1.6.11", "1.7.2" @@ -1067,7 +1068,7 @@ def test_is_nuclio_version_in_range(self): ) # best effort - assumes compatibility - mlrun.runtimes.utils.cached_nuclio_version = "" + mlrun.api.utils.runtimes.nuclio.cached_nuclio_version = "" assert mlrun.api.crud.runtimes.nuclio.helpers.is_nuclio_version_in_range( "1.5.5", "2.3.4" ) @@ -1572,6 +1573,20 @@ def test_deploy_with_service_type( ) assert ingresses[0]["hostTemplate"] == expected_ingress_host_template + def test_deploy_with_readiness_timeout_params( + self, db: Session, client: TestClient + ): + function = self._generate_runtime(self.runtime_kind) + function.spec.readiness_timeout = 501 + function.spec.readiness_timeout_before_failure = True + + self.execute_function(function) + args, _ = nuclio.deploy.deploy_config.call_args + deploy_spec = args[0]["spec"] + + assert deploy_spec["readinessTimeoutSeconds"] == 501 + assert deploy_spec["waitReadinessTimeoutBeforeFailure"] + # Kind of "nuclio:mlrun" is a special case of nuclio functions. Run the same suite of tests here as well class TestNuclioMLRunRuntime(TestNuclioRuntime): diff --git a/tests/artifacts/test_artifacts.py b/tests/artifacts/test_artifacts.py index 0aef748ab5..6aabd3fcd9 100644 --- a/tests/artifacts/test_artifacts.py +++ b/tests/artifacts/test_artifacts.py @@ -16,6 +16,7 @@ import pathlib import typing import unittest.mock +import uuid from contextlib import nullcontext as does_not_raise import pytest @@ -90,55 +91,61 @@ def __init__(self, name="", kind="run"): self.name = name -def test_generate_target_path(): - Artifact = mlrun.artifacts.Artifact - Model = mlrun.artifacts.ModelArtifact - cases = [ - # artifact_path, artifact, src_path, iter, producer, expected - ("x", Artifact("k1"), None, FakeProducer("j1"), "x/j1/0/k1"), +@pytest.mark.parametrize( + "artifact_path,artifact,iter,producer,expected", + [ + ("x", mlrun.artifacts.Artifact("k1"), None, FakeProducer("j1"), "x/j1/0/k1"), ( None, - Artifact("k2", format="html"), + mlrun.artifacts.Artifact("k2", format="html"), 1, FakeProducer("j1"), "j1/1/k2.html", ), ( "", - Artifact("k3", src_path="model.pkl"), + mlrun.artifacts.Artifact("k3", src_path="model.pkl"), 0, FakeProducer("j1"), "j1/0/k3.pkl", ), ( "x", - Artifact("k4", src_path="a.b"), + mlrun.artifacts.Artifact("k4", src_path="a.b"), None, FakeProducer(kind="project"), "x/k4.b", ), ( "", - Model("k5", model_dir="y", model_file="model.pkl"), + mlrun.artifacts.ModelArtifact("k5", model_dir="y", model_file="model.pkl"), 0, FakeProducer("j1"), "j1/0/k5/", ), ( "x", - Model("k6", model_file="a.b"), + mlrun.artifacts.ModelArtifact("k6", model_file="a.b"), None, FakeProducer(kind="project"), "x/k6/", ), - ] - for artifact_path, artifact, iter, producer, expected in cases: - artifact.iter = iter - target = mlrun.artifacts.base.generate_target_path( - artifact, artifact_path, producer - ) - print(f"\ntarget: {target}\nexpected: {expected}") - assert target == expected + ( + "", + mlrun.artifacts.Artifact("k7", src_path="a.tar.gz"), + None, + FakeProducer(kind="project"), + "k7.tar.gz", + ), + ], +) +def test_generate_target_path(artifact_path, artifact, iter, producer, expected): + artifact.iter = iter + target = mlrun.artifacts.base.generate_target_path( + artifact, artifact_path, producer + ) + print(f"\ntarget: {target}\nexpected: {expected}") + assert target == expected def assets_path(): @@ -516,3 +523,22 @@ def test_tag_not_in_model_spec(): assert "tag" not in model_spec, "tag should not be in model spec" assert "tag" not in model_spec["metadata"], "tag should not be in metadata" + + +def test_register_artifacts(rundb_mock): + project_name = "my-projects" + project = mlrun.new_project(project_name) + artifact_key = "my-art" + artifact_tag = "v1" + project.set_artifact( + artifact_key, + artifact=mlrun.artifacts.Artifact(key=artifact_key, body=b"x=1"), + tag=artifact_tag, + ) + + expected_tree = "my_uuid" + with unittest.mock.patch.object(uuid, "uuid4", return_value=expected_tree): + project.register_artifacts() + + artifact = project.get_artifact(artifact_key) + assert artifact.tree == expected_tree diff --git a/tests/assets/log_function.py b/tests/assets/log_function.py new file mode 100644 index 0000000000..6b53af4320 --- /dev/null +++ b/tests/assets/log_function.py @@ -0,0 +1,39 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import pandas as pd + +features = { + "feature_1": [ + {"a": 4, "b": 8, "c": 5}, + {"a": 5, "b": 6, "c": 3}, + {"a": 3, "b": 10, "c": 2}, + ], + "feature_2": [ + {"a": 3, "b": 2, "c": 10}, + {"a": 9, "b": 10, "c": 9}, + {"a": 4, "b": 9, "c": 2}, + {"a": 3, "b": 6, "c": 4}, + ], +} + + +def log_dataset(context): + for dataset_name, dataset_content in (features or {}).items(): + df = pd.DataFrame(dataset_content) + context.log_dataset( + dataset_name, + df=df, + format="csv", + ) diff --git a/tests/automation/version/__init__.py b/tests/automation/version/__init__.py new file mode 100644 index 0000000000..33c5b3d3bd --- /dev/null +++ b/tests/automation/version/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/tests/automation/version/test_version_file.py b/tests/automation/version/test_version_file.py new file mode 100644 index 0000000000..551ec2468b --- /dev/null +++ b/tests/automation/version/test_version_file.py @@ -0,0 +1,195 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import json +import os +import subprocess + +import packaging.version +import pytest + +from automation.version.version_file import ( + create_or_update_version_file, + get_current_version, + is_stable_version, + resolve_next_version, +) + + +@pytest.fixture +def git_repo(tmpdir, request): + # change working directory to tmpdir + os.chdir(tmpdir) + + # set up git repository + subprocess.run(["git", "init"]) + if hasattr(request, "param"): + subprocess.run(["git", "checkout", "-b", request.param["branch"]]) + + # add commits + for i in range(5): + with open(f"file{i}.txt", "w") as f: + f.write(f"test {i}\n") + subprocess.run(["git", "add", f"file{i}.txt"]) + subprocess.run(["git", "commit", "-m", f"test commit {i}"]) + + return tmpdir + + +# tags structure: +# list of tuples, where each tuple is (commit order (0-index, where 0 is latest), tag name) +@pytest.mark.parametrize( + "base_version,tags,expected_current_version", + [ + # no tags were made, default to base_version + ("1.5.0", [], "1.5.0"), + # tags were made, but none of them are similar to base_version, use latest greatest (< base version) + ( + "1.5.0", + [ + (0, "1.4.0"), + (1, "1.3.0"), + ], + "1.4.0", + ), + # tags were made, but none of them are similar to base_version, use latest greatest (> base version) + ( + "1.5.0", + [ + (1, "1.6.0"), + (2, "1.4.0"), + ], + "1.6.0", + ), + # tags were made, similar to base_version, use latest greatest + ( + "1.5.0", + [ + (1, "1.5.0"), + ], + "1.5.0", + ), + # tags were made, similar to base_version, use latest greatest + ( + "1.5.0", + [ + (1, "1.5.1"), + ], + "1.5.1", + ), + ], +) +def test_current_version(git_repo, base_version, tags, expected_current_version): + for tag in tags: + subprocess.run( + [ + "git", + "tag", + "-a", + "-m", + f"test tag {tag[1]}", + f"v{tag[1]}", + f"HEAD~{tag[0]}", + ] + ) + current_version = get_current_version(base_version=packaging.version.parse("1.5.0")) + assert current_version == expected_current_version + + +@pytest.mark.parametrize( + "bump_type,current_version,base_version,feature_name,expected_next_version", + [ + # current version is olden than current base version, + # the next expected version is derived from the base version + ("rc", "1.0.0", "1.1.0", None, "1.1.0-rc1"), + ("rc-grad", "1.0.0", "1.1.0", None, "1.1.0"), + ("patch", "1.0.0", "1.1.0", None, "1.1.1"), + ("minor", "1.0.0", "1.1.0", None, "1.2.0"), + ("major", "1.0.0", "1.1.0", None, "2.0.0"), + # current+base tagged + ("rc", "1.0.0", "1.0.0", None, "1.0.1-rc1"), + ("rc", "1.0.0", "1.0.0", "ft-test", "1.0.1-rc1+ft-test"), + ("rc", "1.0.0-rc1", "1.0.0", None, "1.0.0-rc2"), + ("rc", "1.0.0-rc1", "1.0.0", "ft-test", "1.0.0-rc2+ft-test"), + ("rc-grad", "1.0.0-rc1", "1.0.0", None, "1.0.0"), + ("rc-grad", "1.0.0-rc1", "1.0.0", "ft-test", "1.0.0-rc1+ft-test"), + ("patch", "1.0.0", "1.0.0", None, "1.0.1"), + ("patch", "1.0.0", "1.0.0", "ft-test", "1.0.1-rc1+ft-test"), + ("patch", "1.0.0-rc1", "1.0.0", None, "1.0.1"), + ("patch", "1.0.0-rc1", "1.0.0", "ft-test", "1.0.1-rc1+ft-test"), + ("minor", "1.0.0", "1.0.0", None, "1.1.0"), + ("minor", "1.0.0", "1.0.0", "ft-test", "1.1.0-rc1+ft-test"), + ("minor", "1.0.0-rc1", "1.0.0", None, "1.1.0"), + ("minor", "1.0.0-rc1", "1.0.0", "ft-test", "1.1.0-rc1+ft-test"), + ("major", "1.0.0", "1.0.0", None, "2.0.0"), + ("major", "1.0.0", "1.0.0", "ft-test", "2.0.0-rc1+ft-test"), + ("major", "1.0.0-rc1", "1.0.0", None, "2.0.0"), + ("major", "1.0.0-rc1", "1.0.0", "ft-test", "2.0.0-rc1+ft-test"), + ], +) +def test_next_version( + bump_type, current_version, base_version, feature_name, expected_next_version +): + next_version = resolve_next_version( + bump_type, + packaging.version.parse(current_version), + packaging.version.parse(base_version), + feature_name, + ) + assert ( + next_version == expected_next_version + ), f"expected {expected_next_version}, got {next_version}" + + +@pytest.mark.parametrize( + "git_repo,base_version,expected_version", + [ + ( + {"branch": "development"}, + "1.5.0", + "1.5.0", + ), + ( + # fills feature from branch + {"branch": "feature/something"}, + "1.5.0", + "1.5.0+something", + ), + ], + indirect=["git_repo"], +) +def test_create_or_update_version_file(git_repo, base_version, expected_version): + latest_commit_hash = subprocess.run( + ["git", "rev-parse", "HEAD"], stdout=subprocess.PIPE + ) + create_or_update_version_file(base_version, git_repo / "version.json") + with open(git_repo / "version.json") as f: + version = json.loads(f.read()) + assert version == { + "version": expected_version, + "git_commit": latest_commit_hash.stdout.strip().decode(), + } + + +@pytest.mark.parametrize( + "version,expected_is_stable", + [ + ("1.0.0", True), + ("1.0.0-rc1", False), + ("1.0.0+unstable", False), + ("1.0.0-rc1+ft-test", False), + ], +) +def test_is_stable_version(version: str, expected_is_stable: bool): + assert is_stable_version(version) is expected_is_stable diff --git a/tests/common_fixtures.py b/tests/common_fixtures.py index 6ba6452cfb..a0abe889f8 100644 --- a/tests/common_fixtures.py +++ b/tests/common_fixtures.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import inspect +import os import shutil import unittest +from datetime import datetime from http import HTTPStatus from os import environ from pathlib import Path -from typing import Callable, Generator +from typing import Callable, List, Optional, Union from unittest.mock import Mock import deepdiff @@ -26,11 +29,6 @@ import v3io.dataplane from aioresponses import aioresponses as aioresponses_ -import mlrun.api.utils.singletons.db -import mlrun.api.utils.singletons.k8s -import mlrun.api.utils.singletons.logs_dir -import mlrun.api.utils.singletons.project_member -import mlrun.api.utils.singletons.scheduler import mlrun.config import mlrun.datastore import mlrun.db @@ -38,10 +36,6 @@ import mlrun.projects.project import mlrun.utils import mlrun.utils.singleton -from mlrun.api.db.sqldb.db import SQLDB -from mlrun.api.db.sqldb.session import _init_engine, create_session -from mlrun.api.initial_data import init_data -from mlrun.api.utils.singletons.db import initialize_db from mlrun.config import config from mlrun.lists import ArtifactList from mlrun.runtimes import BaseRuntime @@ -90,15 +84,8 @@ def config_test_base(): # remove singletons in case they were changed (we don't want changes to pass between tests) mlrun.utils.singleton.Singleton._instances = {} - mlrun.api.utils.singletons.db.db = None - mlrun.api.utils.singletons.project_member.project_member = None - mlrun.api.utils.singletons.scheduler.scheduler = None - mlrun.api.utils.singletons.k8s._k8s = None - mlrun.api.utils.singletons.logs_dir.logs_dir = None - mlrun.runtimes.runtime_handler_instances_cache = {} mlrun.runtimes.utils.cached_mpijob_crd_version = None - mlrun.runtimes.utils.cached_nuclio_version = None # TODO: update this to "sidecar" once the default mode is changed mlrun.config.config.log_collector.mode = "legacy" @@ -117,45 +104,11 @@ def aioresponses_mock(): yield aior -@pytest.fixture -def db(): - global session_maker - dsn = "sqlite:///:memory:?check_same_thread=false" - db_session = None - try: - config.httpdb.dsn = dsn - _init_engine(dsn=dsn) - init_data() - initialize_db() - db_session = create_session() - db = SQLDB(dsn) - db.initialize(db_session) - config.dbpath = dsn - finally: - if db_session is not None: - db_session.close() - mlrun.api.utils.singletons.db.initialize_db(db) - mlrun.api.utils.singletons.logs_dir.initialize_logs_dir() - mlrun.api.utils.singletons.project_member.initialize_project_member() - return db - - @pytest.fixture def ensure_default_project() -> mlrun.projects.project.MlrunProject: return mlrun.get_or_create_project("default") -@pytest.fixture() -def db_session() -> Generator: - db_session = None - try: - db_session = create_session() - yield db_session - finally: - if db_session is not None: - db_session.close() - - @pytest.fixture() def running_as_api(): old_is_running_as_api = mlrun.config.is_running_as_api @@ -164,6 +117,27 @@ def running_as_api(): mlrun.config.is_running_as_api = old_is_running_as_api +@pytest.fixture() +def chdir_to_test_location(request): + """ + Fixture to change the working directory for tests, + It allows seamless access to files relative to the test file. + + Because the working directory inside the dockerized test is '/mlrun', + this fixture allows to automatically modify the cwd to the test file directory, + to ensure the workflow files are located, + and modify it back after the test case for other tests + + """ + original_working_dir = os.getcwd() + test_file_path = os.path.dirname(inspect.getfile(request.function)) + os.chdir(os.path.dirname(test_file_path)) + + yield + + os.chdir(original_working_dir) + + @pytest.fixture def patch_file_forbidden(monkeypatch): class MockV3ioClient: @@ -275,6 +249,33 @@ def store_run(self, struct, uid, project="", iter=0): def read_run(self, uid, project, iter=0): return self._runs.get(uid, {}) + def list_runs( + self, + name: Optional[str] = None, + uid: Optional[Union[str, List[str]]] = None, + project: Optional[str] = None, + labels: Optional[Union[str, List[str]]] = None, + state: Optional[str] = None, + sort: bool = True, + last: int = 0, + iter: bool = False, + start_time_from: Optional[datetime] = None, + start_time_to: Optional[datetime] = None, + last_update_time_from: Optional[datetime] = None, + last_update_time_to: Optional[datetime] = None, + partition_by: Optional[ + Union[mlrun.common.schemas.RunPartitionByField, str] + ] = None, + rows_per_partition: int = 1, + partition_sort_by: Optional[Union[mlrun.common.schemas.SortField, str]] = None, + partition_order: Union[ + mlrun.common.schemas.OrderType, str + ] = mlrun.common.schemas.OrderType.desc, + max_partitions: int = 0, + with_notifications: bool = False, + ) -> mlrun.lists.RunList: + return mlrun.lists.RunList(self._runs.values()) + def get_function(self, function, project, tag, hash_key=None): if function not in self._functions: raise mlrun.errors.MLRunNotFoundError("Function not found") @@ -303,11 +304,14 @@ def submit_pipeline( return True def store_project(self, name, project): - self._project_name = name + return self.create_project(project) + def create_project(self, project): if isinstance(project, dict): project = mlrun.projects.MlrunProject.from_dict(project) self._project = project + self._project_name = project.name + return self._project def get_project(self, name): if self._project_name and name == self._project_name: diff --git a/tests/conftest.py b/tests/conftest.py index 7344abf80f..a3ef648dd2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,9 +23,6 @@ from time import monotonic, sleep from urllib.request import URLError, urlopen -from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker - tests_root_directory = Path(__file__).absolute().parent results = tests_root_directory / "test_results" is_ci = "CI" in environ @@ -41,8 +38,7 @@ pytest_plugins = ["tests.common_fixtures"] # import package stuff after setting env vars so it will take effect -from mlrun.api.db.sqldb.db import run_time_fmt # noqa: E402 -from mlrun.api.db.sqldb.models import Base # noqa: E402 +from mlrun.utils.db import run_time_fmt # noqa: E402 def check_docker(): @@ -122,12 +118,6 @@ def wrapper(*args, **kwargs): return wrapper -def init_sqldb(dsn): - engine = create_engine(dsn) - Base.metadata.create_all(bind=engine) - return sessionmaker(bind=engine) - - def exec_mlrun(args, cwd=None, op="run"): cmd = [executable, "-m", "mlrun", op] + args out = run(cmd, stdout=PIPE, stderr=PIPE, cwd=cwd) diff --git a/tests/datastore/test_base.py b/tests/datastore/test_base.py index ed57cda732..37fe6d06d4 100644 --- a/tests/datastore/test_base.py +++ b/tests/datastore/test_base.py @@ -26,6 +26,7 @@ from mlrun.datastore.azure_blob import AzureBlobStore from mlrun.datastore.base import HttpStore from mlrun.datastore.datastore import schema_to_store +from mlrun.datastore.dbfs_store import DBFSStore from mlrun.datastore.filestore import FileStore from mlrun.datastore.google_cloud_storage import GoogleCloudStorageStore from mlrun.datastore.redis import RedisStore @@ -132,6 +133,7 @@ def test_kafka_source_without_attributes(): (["redis", "rediss"], RedisStore, does_not_raise()), (["http", "https"], HttpStore, does_not_raise()), (["gcs", "gs"], GoogleCloudStorageStore, does_not_raise()), + (["dbfs"], DBFSStore, does_not_raise()), (["random"], None, pytest.raises(ValueError)), ], ) diff --git a/tests/integration/google_cloud_storage/test_data.csv b/tests/integration/google_cloud_storage/test_data.csv new file mode 100644 index 0000000000..ed8408276f --- /dev/null +++ b/tests/integration/google_cloud_storage/test_data.csv @@ -0,0 +1,4 @@ +Name,Age,City +Alice,30,Los Angeles +Bob,35,Chicago +Jane,28,San Francisco diff --git a/tests/integration/google_cloud_storage/test_data.parquet b/tests/integration/google_cloud_storage/test_data.parquet new file mode 100644 index 0000000000..7160e4069f Binary files /dev/null and b/tests/integration/google_cloud_storage/test_data.parquet differ diff --git a/tests/integration/google_cloud_storage/test_google_cloud_storage.py b/tests/integration/google_cloud_storage/test_google_cloud_storage.py index 00043417bd..49ae43830f 100644 --- a/tests/integration/google_cloud_storage/test_google_cloud_storage.py +++ b/tests/integration/google_cloud_storage/test_google_cloud_storage.py @@ -16,6 +16,7 @@ import random from pathlib import Path +import pandas as pd import pytest import yaml @@ -82,6 +83,18 @@ def _perform_google_cloud_storage_tests(self): upload_data_item.upload(test_filename) response = upload_data_item.get() assert response.decode() == test_string, "Result differs from original test" + upload_parquet_file_path = f"{os.path.dirname(self._blob_url)}/file.parquet" + upload_parquet_data_item = mlrun.run.get_dataitem(upload_parquet_file_path) + test_parquet = here / "test_data.parquet" + upload_parquet_data_item.upload(str(test_parquet)) + response = upload_parquet_data_item.as_df() + assert pd.read_parquet(test_parquet).equals(response) + upload_csv_file_path = f"{os.path.dirname(self._blob_url)}/file.csv" + upload_csv_data_item = mlrun.run.get_dataitem(upload_csv_file_path) + test_csv = here / "test_data.csv" + upload_csv_data_item.upload(str(test_csv)) + response = upload_csv_data_item.as_df() + assert pd.read_csv(test_csv).equals(response) def test_using_google_env_variable(self): os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = config["env"].get( diff --git a/tests/integration/sdk_api/httpdb/test_exception_handling.py b/tests/integration/sdk_api/httpdb/test_exception_handling.py index 5112a3f003..b61948d805 100644 --- a/tests/integration/sdk_api/httpdb/test_exception_handling.py +++ b/tests/integration/sdk_api/httpdb/test_exception_handling.py @@ -55,7 +55,7 @@ def test_exception_handling(self): match=rf"400 Client Error: Bad Request for url: http:\/\/(.*)\/{mlrun.get_run_db().get_api_path_prefix()}" r"\/projects: Failed creating project some_p" r"roject details: MLRunInvalidArgumentError\(\"Field \'project\.metadata\.name\' is malformed" - r"\. Does not match required pattern: (.*)\"\)", + rf"\. {invalid_project_name} does not match required pattern: (.*)\"\)", ): mlrun.get_run_db().create_project(project) diff --git a/tests/integration/test_dbfs_store/additional_data.csv b/tests/integration/test_dbfs_store/additional_data.csv new file mode 100644 index 0000000000..1ad5e22d03 --- /dev/null +++ b/tests/integration/test_dbfs_store/additional_data.csv @@ -0,0 +1,3 @@ +Name,Age,City +Emily,28,Paris +Michael,32,Sydney diff --git a/tests/integration/test_dbfs_store/additional_data.parquet b/tests/integration/test_dbfs_store/additional_data.parquet new file mode 100644 index 0000000000..b4a57a7829 Binary files /dev/null and b/tests/integration/test_dbfs_store/additional_data.parquet differ diff --git a/tests/integration/test_dbfs_store/test-dbfs-store.yml b/tests/integration/test_dbfs_store/test-dbfs-store.yml new file mode 100644 index 0000000000..f83629c05f --- /dev/null +++ b/tests/integration/test_dbfs_store/test-dbfs-store.yml @@ -0,0 +1,20 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +env: + # databricks authentication token + DATABRICKS_TOKEN: + # databricks endpoint - for example abc-d1e2345f-a6b2.cloud.databricks.com (this example is from docs.databricks.com + # - it is not for use) + DATABRICKS_HOST: diff --git a/tests/integration/test_dbfs_store/test.txt b/tests/integration/test_dbfs_store/test.txt new file mode 100644 index 0000000000..bec7a6c2cb --- /dev/null +++ b/tests/integration/test_dbfs_store/test.txt @@ -0,0 +1,2 @@ +This is just a test file, meant to test the upload functionality. +Nothing really interesting here. diff --git a/tests/integration/test_dbfs_store/test_data.csv b/tests/integration/test_dbfs_store/test_data.csv new file mode 100644 index 0000000000..ed8408276f --- /dev/null +++ b/tests/integration/test_dbfs_store/test_data.csv @@ -0,0 +1,4 @@ +Name,Age,City +Alice,30,Los Angeles +Bob,35,Chicago +Jane,28,San Francisco diff --git a/tests/integration/test_dbfs_store/test_data.json b/tests/integration/test_dbfs_store/test_data.json new file mode 100644 index 0000000000..2bd64bcbe6 --- /dev/null +++ b/tests/integration/test_dbfs_store/test_data.json @@ -0,0 +1 @@ +{"Name":{"0":"Alice","1":"Bob","2":"Jane"},"Age":{"0":30,"1":35,"2":28},"City":{"0":"Los Angeles","1":"Chicago","2":"San Francisco"}} diff --git a/tests/integration/test_dbfs_store/test_data.parquet b/tests/integration/test_dbfs_store/test_data.parquet new file mode 100644 index 0000000000..7160e4069f Binary files /dev/null and b/tests/integration/test_dbfs_store/test_data.parquet differ diff --git a/tests/integration/test_dbfs_store/test_dbfs_store.py b/tests/integration/test_dbfs_store/test_dbfs_store.py new file mode 100644 index 0000000000..2d07612905 --- /dev/null +++ b/tests/integration/test_dbfs_store/test_dbfs_store.py @@ -0,0 +1,291 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import uuid +from pathlib import Path +from typing import List + +import dask.dataframe as dd +import pandas as pd +import pytest +import yaml +from databricks.sdk import WorkspaceClient + +import mlrun +import mlrun.errors + +PARQUETS_DIR = "/parquets" +CSV_DIR = "/csv" +here = Path(__file__).absolute().parent +config_file_path = here / "test-dbfs-store.yml" +with config_file_path.open() as fp: + config = yaml.safe_load(fp) + +test_file_path = str(here / "test.txt") +json_path = str(here / "test_data.json") +parquet_path = str(here / "test_data.parquet") +additional_parquet_path = str(here / "additional_data.parquet") +csv_path = str(here / "test_data.csv") +additional_csv_path = str(here / "additional_data.csv") +with open(test_file_path, "r") as f: + test_string = f.read() + +MUST_HAVE_VARIABLES = ["DATABRICKS_TOKEN", "DATABRICKS_HOST"] + + +def is_dbfs_configured(): + env_params = config["env"] + for necessary_variable in MUST_HAVE_VARIABLES: + if env_params.get(necessary_variable, None) is None: + return False + return True + + +@pytest.mark.skipif( + not is_dbfs_configured(), + reason="DBFS storage parameters not configured", +) +class TestDBFSStore: + def setup_class(self): + databricks_host = config["env"].get("DATABRICKS_HOST") + env_params = config["env"] + for key, env_param in env_params.items(): + os.environ[key] = env_param + self.test_root_dir = "/test_mlrun_dbfs_objects" + self._dbfs_url = "dbfs://" + databricks_host + self.workspace = WorkspaceClient() + + @pytest.fixture(autouse=True) + def setup_before_each_test(self): + all_paths = [file_info.path for file_info in self.workspace.dbfs.list("/")] + if self.test_root_dir not in all_paths: + self.workspace.dbfs.mkdirs(f"{self.test_root_dir}{PARQUETS_DIR}") + self.workspace.dbfs.mkdirs(f"{self.test_root_dir}{CSV_DIR}") + else: + self.workspace.dbfs.delete(self.test_root_dir, recursive=True) + self.workspace.dbfs.mkdirs(f"{self.test_root_dir}{PARQUETS_DIR}") + self.workspace.dbfs.mkdirs(f"{self.test_root_dir}{CSV_DIR}") + + def teardown_class(self): + all_paths_under_test_root = [ + file_info.path for file_info in self.workspace.dbfs.list(self.test_root_dir) + ] + for path in all_paths_under_test_root: + self.workspace.dbfs.delete(path, recursive=True) + + def _get_data_item(self, secrets={}): + object_path = f"{self.test_root_dir}/file_{uuid.uuid4()}.txt" + object_url = f"{self._dbfs_url}{object_path}" + return mlrun.run.get_dataitem(object_url, secrets=secrets), object_url + + @pytest.mark.parametrize("use_secrets_as_parameters", [True, False]) + def test_put_and_get(self, use_secrets_as_parameters): + secrets = {} + if use_secrets_as_parameters: + token = config["env"].get("DATABRICKS_TOKEN", None) + secrets = {"DATABRICKS_TOKEN": token} + os.environ["DATABRICKS_TOKEN"] = "" + try: + data_item, _ = self._get_data_item(secrets=secrets) + data_item.put(test_string) + response = data_item.get() + assert response.decode() == test_string + + response = data_item.get(offset=20) + assert response.decode() == test_string[20:] + + finally: + if use_secrets_as_parameters: + os.environ["DATABRICKS_TOKEN"] = token + + def test_stat(self): + data_item, _ = self._get_data_item() + data_item.put(test_string) + stat = data_item.stat() + assert stat.size == len(test_string) + + def test_list_dir(self): + data_item, object_url = self._get_data_item() + data_item.put(test_string) + dir_dataitem = mlrun.run.get_dataitem( + self._dbfs_url + self.test_root_dir, + ) + dir_list = dir_dataitem.listdir() + assert object_url.split("/")[-1] in dir_list + + def test_upload(self): + data_item, _ = self._get_data_item() + data_item.upload(test_file_path) + response = data_item.get() + assert response.decode() == test_string + + def test_rm(self): + data_item, _ = self._get_data_item() + data_item.upload(test_file_path) + data_item.stat() + data_item.delete() + with pytest.raises(FileNotFoundError) as file_not_found_error: + data_item.stat() + assert "No file or directory exists on path" in str(file_not_found_error.value) + + @pytest.mark.parametrize( + "file_extension, local_file_path, reader", + [ + ( + "parquet", + parquet_path, + pd.read_parquet, + ), + ("csv", csv_path, pd.read_csv), + ("json", json_path, pd.read_json), + ], + ) + def test_as_df(self, file_extension: str, local_file_path: str, reader: callable): + source = reader(local_file_path) + upload_file_path = f"{self.test_root_dir}/file_{uuid.uuid4()}.{file_extension}" + upload_data_item = mlrun.run.get_dataitem( + self._dbfs_url + upload_file_path, + ) + upload_data_item.upload(local_file_path) + response = upload_data_item.as_df() + assert source.equals(response) + + @pytest.mark.parametrize( + "file_extension, local_file_path, reader", + [ + ( + "parquet", + parquet_path, + dd.read_parquet, + ), + ("csv", csv_path, dd.read_csv), + ("json", json_path, dd.read_json), + ], + ) + def test_as_df_dd( + self, file_extension: str, local_file_path: str, reader: callable + ): + source = reader(local_file_path) + upload_file_path = f"{self.test_root_dir}/file_{uuid.uuid4()}.{file_extension}" + upload_data_item = mlrun.run.get_dataitem( + self._dbfs_url + upload_file_path, + ) + upload_data_item.upload(local_file_path) + response = upload_data_item.as_df(df_module=dd) + assert dd.assert_eq(source, response) + + def _setup_df_dir( + self, first_file_path, second_file_path, file_extension, directory + ): + uploaded_file_path = ( + f"{self.test_root_dir}{directory}/file_{uuid.uuid4()}.{file_extension}" + ) + uploaded_data_item = mlrun.run.get_dataitem(self._dbfs_url + uploaded_file_path) + uploaded_data_item.upload(first_file_path) + + uploaded_file_path = ( + f"{self.test_root_dir}{directory}/file_{uuid.uuid4()}.{file_extension}" + ) + uploaded_data_item = mlrun.run.get_dataitem(self._dbfs_url + uploaded_file_path) + uploaded_data_item.upload(second_file_path) + return os.path.dirname(uploaded_file_path) + + @pytest.mark.parametrize( + "directory, file_format, file_extension, files_paths, reader", + [ + ( + PARQUETS_DIR, + "parquet", + "parquet", + [parquet_path, additional_parquet_path], + pd.read_parquet, + ), + (CSV_DIR, "csv", "csv", [csv_path, additional_csv_path], pd.read_csv), + ], + ) + def test_check_read_df_dir( + self, + directory: str, + file_format: str, + file_extension: str, + files_paths: List[Path], + reader: callable, + ): + first_file_path = files_paths[0] + second_file_path = files_paths[1] + df_dir = self._setup_df_dir( + first_file_path=first_file_path, + second_file_path=second_file_path, + file_extension=file_extension, + directory=directory, + ) + dir_data_item = mlrun.run.get_dataitem(self._dbfs_url + df_dir) + response_df = ( + dir_data_item.as_df(format=file_format) + .sort_values("Name") + .reset_index(drop=True) + ) + df = reader(files_paths[0]) + additional_df = reader(second_file_path) + appended_df = ( + pd.concat([df, additional_df], axis=0) + .sort_values("Name") + .reset_index(drop=True) + ) + assert response_df.equals(appended_df) + + @pytest.mark.parametrize( + "directory, file_format, file_extension, files_paths, reader", + [ + ( + PARQUETS_DIR, + "parquet", + "parquet", + [parquet_path, additional_parquet_path], + dd.read_parquet, + ), + (CSV_DIR, "csv", "csv", [csv_path, additional_csv_path], dd.read_csv), + ], + ) + def test_check_read_df_dir_dd( + self, + directory: str, + file_format: str, + file_extension: str, + files_paths: List[Path], + reader: callable, + ): + first_file_path = files_paths[0] + second_file_path = files_paths[1] + df_dir = self._setup_df_dir( + first_file_path=first_file_path, + second_file_path=second_file_path, + file_extension=file_extension, + directory=directory, + ) + dir_data_item = mlrun.run.get_dataitem(self._dbfs_url + df_dir) + response_df = ( + dir_data_item.as_df(format=file_format, df_module=dd) + .sort_values("Name") + .reset_index(drop=True) + ) + df = reader(first_file_path) + additional_df = reader(second_file_path) + appended_df = ( + dd.concat([df, additional_df], axis=0) + .sort_values("Name") + .reset_index(drop=True) + ) + assert dd.assert_eq(appended_df, response_df) diff --git a/tests/projects/assets/load_setup_test/prep_data.py b/tests/projects/assets/load_setup_test/prep_data.py new file mode 100644 index 0000000000..8aafbc08f2 --- /dev/null +++ b/tests/projects/assets/load_setup_test/prep_data.py @@ -0,0 +1,27 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import mlrun + + +def prep_data(context, source_url: mlrun.DataItem, label_column="label"): + # Convert the DataItem to a pandas DataFrame + df = source_url.as_df() + print("data url:", source_url.url) + df[label_column] = df[label_column].astype("category").cat.codes + + # Record the DataFrame length after the run + context.log_result("num_rows", df.shape[0]) + + # Store the data set in your artifacts database + context.log_dataset("cleaned_data", df=df, index=False, format="csv") diff --git a/tests/projects/assets/load_setup_test/project.yaml b/tests/projects/assets/load_setup_test/project.yaml new file mode 100644 index 0000000000..f076ad4392 --- /dev/null +++ b/tests/projects/assets/load_setup_test/project.yaml @@ -0,0 +1,25 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +kind: project +metadata: + name: +spec: + description: test + params: + p1: xyz + artifacts: + - kind: '' + target_path: https://s3.wasabisys.com/iguazio/data/iris/iris.data.raw.csv + key: data diff --git a/tests/projects/assets/load_setup_test/project_setup.py b/tests/projects/assets/load_setup_test/project_setup.py new file mode 100644 index 0000000000..fff90dc76e --- /dev/null +++ b/tests/projects/assets/load_setup_test/project_setup.py @@ -0,0 +1,30 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import mlrun + + +def setup(project: mlrun.projects.MlrunProject): + """Example for project setup script which modify project metadata and functions""" + project.spec.params["test123"] = "456" + prep_func = project.set_function( + "prep_data.py", "prep-data", kind="job", image="mlrun/mlrun" + ) + prep_func.set_label("tst1", project.get_param("p2")) + + srv_func = project.set_function( + "serving.py", "serving", kind="serving", image="mlrun/mlrun" + ) + # graph = srv_func.set_topology() + srv_func.add_model("x", ".", class_name="MyCls") + return project diff --git a/tests/projects/assets/load_setup_test/serving.py b/tests/projects/assets/load_setup_test/serving.py new file mode 100644 index 0000000000..ed8462de66 --- /dev/null +++ b/tests/projects/assets/load_setup_test/serving.py @@ -0,0 +1,15 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# empty py file for test diff --git a/tests/projects/test_project.py b/tests/projects/test_project.py index 1d2570653f..8965693310 100644 --- a/tests/projects/test_project.py +++ b/tests/projects/test_project.py @@ -15,6 +15,7 @@ import os import os.path import pathlib +import re import shutil import tempfile import unittest.mock @@ -338,6 +339,36 @@ def test_load_project( assert os.path.exists(os.path.join(context, project_file)) +def test_load_project_with_setup(context): + # load the project from the "assets/load_setup_test" dir, and init using the project_setup.py in it + project_path = ( + pathlib.Path(tests.conftest.tests_root_directory) + / "projects" + / "assets" + / "load_setup_test" + ) + project = mlrun.load_project( + context=project_path, name="projset", save=False, parameters={"p2": "123"} + ) + mlrun.utils.logger.info(f"Project: {project}") + + # see assets/load_setup_test/project_setup.py for extra project settings + # test that a function was added and its metadata was set from param[p2] + prep_func = project.get_function("prep-data") + assert prep_func.metadata.labels == {"tst1": "123"} # = p2 + + # test that a serving function was set with a graph element (model) + srv_func = project.get_function("serving") + assert srv_func.spec.graph["x"].class_name == "MyCls", "serving graph was not set" + + # test that the project metadata was set correctly + assert project.name == "projset" + assert project.spec.context == project_path + + # test that the params contain all params from the yaml, the load, and the setup script + assert project.spec.params == {"p1": "xyz", "p2": "123", "test123": "456"} + + @pytest.mark.parametrize( "sync,expected_num_of_funcs, save", [ @@ -368,7 +399,7 @@ def test_load_project_and_sync_functions( assert len(project.spec._function_objects) == expected_num_of_funcs if sync: - function_names = project.get_function_names() + function_names = project.spec._function_definitions.keys() assert len(function_names) == expected_num_of_funcs for func in function_names: fn = project.get_function(func) @@ -862,6 +893,62 @@ def test_run_function_passes_project_artifact_path(rundb_mock): assert run6.spec.output_path == proj1.spec.artifact_path +@pytest.mark.parametrize( + "workflow_path,exception", + [ + ( + "./", + pytest.raises( + ValueError, + match=str( + re.escape( + "Invalid 'workflow_path': './'. Please provide a valid URL/path to a file." + ) + ), + ), + ), + ( + "https://test", + pytest.raises( + ValueError, + match=str( + re.escape( + "Invalid 'workflow_path': 'https://test'. Please provide a valid URL/path to a file." + ) + ), + ), + ), + ( + "", + pytest.raises( + ValueError, + match=str( + re.escape( + "Invalid 'workflow_path': ''. Please provide a valid URL/path to a file." + ) + ), + ), + ), + ("https://test.py", does_not_raise()), + # relative path + ("./workflow.py", does_not_raise()), + # only file name + ("workflow.py", does_not_raise()), + # absolute path + ( + str(pathlib.Path(__file__).parent / "assets" / "handler.py"), + does_not_raise(), + ), + ], +) +def test_set_workflow_with_invalid_path( + chdir_to_test_location, workflow_path, exception +): + proj = mlrun.new_project("proj", save=False) + with exception: + proj.set_workflow("main", workflow_path) + + def test_project_ops(): # verify that project ops (run_function, ..) will use the right project (and not the pipeline_context) func_path = str(pathlib.Path(__file__).parent / "assets" / "handler.py") diff --git a/tests/run/test_run.py b/tests/run/test_run.py index 6a4c059359..fdef748d84 100644 --- a/tests/run/test_run.py +++ b/tests/run/test_run.py @@ -97,7 +97,7 @@ def test_invalid_name(): # name cannot have / in it new_function().run(name="asd/asd", handler=my_func) assert ( - "Field 'run.metadata.name' is malformed. Does not match required pattern" + "Field 'run.metadata.name' is malformed. asd/asd does not match required pattern" in str(excinfo.value) ) diff --git a/tests/rundb/test_httpdb.py b/tests/rundb/test_httpdb.py index 0d514080c3..0623a5c825 100644 --- a/tests/rundb/test_httpdb.py +++ b/tests/rundb/test_httpdb.py @@ -396,6 +396,13 @@ def test_list_functions(create_server): # Server or client version is unstable, assuming compatibility ("0.7.1", "0.0.0+unstable", True), ("0.0.0+unstable", "0.7.1", True), + # feature branch + ("0.7.1", "0.0.0+feature-branch", True), + ("0.7.1-rc1", "0.0.0+feature-branch", True), + ("0.7.1-rc1+feature-branch", "0.0.0+feature-branch", True), + ("0.7.1", "0.7.1+feature-branch", True), + ("0.7.1-rc1", "0.7.1+feature-branch", True), + ("0.7.1-rc1+feature-branch", "0.7.1+feature-branch", True), ], ) def test_version_compatibility_validation(server_version, client_version, compatible): diff --git a/tests/system/api/test_secrets.py b/tests/system/api/test_secrets.py index ff479c6790..e68982cfef 100644 --- a/tests/system/api/test_secrets.py +++ b/tests/system/api/test_secrets.py @@ -12,12 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import datetime import pathlib +import time +import typing +import uuid from http import HTTPStatus import deepdiff +import igz_mgmt import pytest +import mlrun.api.utils.events.iguazio import mlrun.common.schemas import mlrun.errors from tests.system.base import TestMLRunSystem @@ -27,6 +33,139 @@ class TestKubernetesProjectSecrets(TestMLRunSystem): project_name = "db-system-test-project" + @pytest.mark.enterprise + def test_audit_project_secret_events(self): + secret_key = str(uuid.uuid4()) + secrets = {secret_key: "JustMySecret"} + + # ensure no project secrets + self._run_db.delete_project_secrets(self.project_name, provider="kubernetes") + + # create secret + now = datetime.datetime.utcnow() + self.project.set_secrets(secrets=secrets) + + self._ensure_audit_events( + mlrun.api.utils.events.iguazio.PROJECT_SECRET_CREATED, + now, + "secret_keys", + secret_key, + ) + + now = datetime.datetime.utcnow() + another_secret_key = str(uuid.uuid4()) + secrets.update({another_secret_key: "one"}) + self.project.set_secrets(secrets=secrets) + self._ensure_audit_events( + mlrun.api.utils.events.iguazio.PROJECT_SECRET_UPDATED, + now, + "secret_keys", + another_secret_key, + ) + + # delete secrets + now = datetime.datetime.utcnow() + self._run_db.delete_project_secrets(self.project_name, provider="kubernetes") + self._ensure_audit_events( + mlrun.api.utils.events.iguazio.PROJECT_SECRET_DELETED, + now, + "project_name", + self.project_name, + ) + + @pytest.mark.enterprise + def test_delete_project_secret_events(self): + """ + Test flow: + 1. Delete project secrets of project with no secrets - should not emit event + 2. Create 2 secrets - should emit created event + 3. Delete 1 secret - should emit update event + 4. Delete all secrets - should emit deleted event + 5. Delete project - should not emit secret deleted event + """ + secret_key1 = str(uuid.uuid4()) + secret_key2 = str(uuid.uuid4()) + secrets = { + secret_key1: "JustMySecret", + secret_key2: "MyOtherSecret", + } + + # ensure no project secrets + start = datetime.datetime.utcnow() + self._run_db.delete_project_secrets(self.project_name, provider="kubernetes") + time.sleep(1) + audit_events = igz_mgmt.AuditEvent.list( + self._igz_mgmt_client, + filter_by={ + "source": "mlrun-api", + "kind": mlrun.api.utils.events.iguazio.PROJECT_SECRET_DELETED, + "timestamp_iso8601": f"[$ge]{start.isoformat()}Z", + }, + ) + assert len(audit_events) == 0 + + now = datetime.datetime.utcnow() + self.project.set_secrets(secrets=secrets) + self._ensure_audit_events( + mlrun.api.utils.events.iguazio.PROJECT_SECRET_CREATED, + now, + "project_name", + self.project_name, + ) + + # delete 1 of the secrets + now = datetime.datetime.utcnow() + self._run_db.delete_project_secrets( + self.project_name, provider="kubernetes", secrets=[secret_key1] + ) + + # project secret should remain (updated) + self._ensure_audit_events( + mlrun.api.utils.events.iguazio.PROJECT_SECRET_UPDATED, + now, + "secret_keys", + secret_key1, + ) + + # delete all secrets + now = datetime.datetime.utcnow() + self._run_db.delete_project_secrets(self.project_name, provider="kubernetes") + self._ensure_audit_events( + mlrun.api.utils.events.iguazio.PROJECT_SECRET_DELETED, + now, + "project_name", + self.project_name, + ) + + # delete the secret-less project + now = datetime.datetime.utcnow() + self._run_db.delete_project( + self.project_name, mlrun.common.schemas.DeletionStrategy.cascade + ) + + # should not emit deleted event + time.sleep(1) + audit_events = igz_mgmt.AuditEvent.list( + self._igz_mgmt_client, + filter_by={ + "source": "mlrun-api", + "kind": mlrun.api.utils.events.iguazio.PROJECT_SECRET_DELETED, + "timestamp_iso8601": f"[$ge]{now.isoformat()}Z", + }, + ) + assert len(audit_events) == 0 + + # assert 1 deleted event from the start of the test + audit_events = igz_mgmt.AuditEvent.list( + self._igz_mgmt_client, + filter_by={ + "source": "mlrun-api", + "kind": mlrun.api.utils.events.iguazio.PROJECT_SECRET_DELETED, + "timestamp_iso8601": f"[$ge]{start.isoformat()}Z", + }, + ) + assert len(audit_events) == 1 + def test_k8s_project_secrets_using_api(self): secrets = {"secret1": "value1", "secret2": "value2"} data = {"provider": "kubernetes", "secrets": secrets} @@ -201,3 +340,52 @@ def test_k8s_project_secrets_with_runtime(self): # Cleanup secrets self._run_db.delete_project_secrets(self.project_name, provider="kubernetes") + + def _ensure_audit_events( + self, + event_kind: str, + since_time: datetime.datetime, + parameter_text_name: str, + parameter_text_value: str, + ): + actual_event = None + for event in self._get_audit_events(event_kind, since_time): + if not event.parameters_text: + continue + for parameter_text in event.parameters_text: + if ( + parameter_text.name == parameter_text_name + and parameter_text_value in parameter_text.value + ): + actual_event = event + break + assert actual_event is not None, "Failed to find the audit event" + + def _get_audit_events( + self, event_kind: str, since_time: datetime.datetime + ) -> typing.List[igz_mgmt.AuditEvent]: + def _get_audit_events(): + self._logger.info( + "Trying to get audit events", + event_kind=event_kind, + since_time=since_time.isoformat(), + ) + audit_events = igz_mgmt.AuditEvent.list( + self._igz_mgmt_client, + filter_by={ + "source": "mlrun-api", + "kind": event_kind, + "timestamp_iso8601": f"[$ge]{since_time.isoformat()}Z", + }, + ) + assert len(audit_events) > 0 + return audit_events + + # wait for 30 seconds for the audit events to be available + return mlrun.utils.retry_until_successful( + 3, + 10 * 3, + self._logger, + True, + _get_audit_events, + ) diff --git a/tests/system/base.py b/tests/system/base.py index 4cab1fbd6d..dacd9c29cd 100644 --- a/tests/system/base.py +++ b/tests/system/base.py @@ -17,6 +17,7 @@ import sys import typing +import igz_mgmt import pytest import yaml from deepdiff import DeepDiff @@ -42,6 +43,7 @@ class TestMLRunSystem: "V3IO_FRAMESD", "V3IO_USERNAME", "V3IO_ACCESS_KEY", + "MLRUN_IGUAZIO_API_URL", "MLRUN_SYSTEM_TESTS_DEFAULT_SPARK_SERVICE", ] @@ -58,6 +60,13 @@ def setup_class(cls): cls._run_db = get_run_db() cls.custom_setup_class() cls._logger = logger.get_child(cls.__name__.lower()) + cls.project: typing.Optional[mlrun.projects.MlrunProject] = None + + if "MLRUN_IGUAZIO_API_URL" in env: + cls._igz_mgmt_client = igz_mgmt.Client( + endpoint=env["MLRUN_IGUAZIO_API_URL"], + access_key=env["V3IO_ACCESS_KEY"], + ) # the dbpath is already configured on the test startup before this stage # so even though we set the env var, we still need to directly configure diff --git a/tests/system/env-template.yml b/tests/system/env-template.yml index 20eb28d676..d9d949d5b4 100644 --- a/tests/system/env-template.yml +++ b/tests/system/env-template.yml @@ -21,6 +21,9 @@ MLRUN_DBPATH: # The webapi https_direct url - e.g. https://default-tenant.app.hedingber-28-1.iguazio-cd2.com:8444 V3IO_API: +# Iguazio API URL - e.g. https://dashboard.default-tenant.app.hedingber-28-1.iguazio-cd2.com +MLRUN_IGUAZIO_API_URL: + # The framesd url - e.g. https://framesd.default-tenant.app.hedingber-28-1.iguazio-cd2.com V3IO_FRAMESD: diff --git a/tests/system/feature_store/test_feature_store.py b/tests/system/feature_store/test_feature_store.py index 751f06d341..a4d6e0a4d2 100644 --- a/tests/system/feature_store/test_feature_store.py +++ b/tests/system/feature_store/test_feature_store.py @@ -893,7 +893,9 @@ def test_ingest_partitioned_by_key_and_time( assert result_columns.sort() == orig_columns.sort() @pytest.mark.parametrize("engine", ["storey", "pandas"]) - def test_passthrough_feature_set(self, engine): + @pytest.mark.parametrize("with_start_time", [True, False]) + @pytest.mark.parametrize("explicit_targets", [True, False]) + def test_passthrough_feature_set(self, engine, with_start_time, explicit_targets): name = f"measurements_set_{uuid.uuid4()}" key = "patient_id" measurements_set = fstore.FeatureSet( @@ -912,7 +914,10 @@ def test_passthrough_feature_set(self, engine): expected = source.to_dataframe().set_index("patient_id") # The file is sorted by time. 10 is just an arbitrary number. - start_time = expected["timestamp"][10] + if with_start_time: + start_time = expected["timestamp"][10] + else: + start_time = None if engine != "pandas": # pandas engine does not support preview (ML-2694) preview_pd = fstore.preview( @@ -923,11 +928,16 @@ def test_passthrough_feature_set(self, engine): preview_pd.set_index("patient_id", inplace=True) assert_frame_equal(expected, preview_pd, check_like=True, check_dtype=False) - fstore.ingest(measurements_set, source) + targets = [NoSqlTarget()] if explicit_targets else None + + fstore.ingest(measurements_set, source, targets=targets) - # assert that online target exist (nosql) and offline target does not (parquet) - assert len(measurements_set.status.targets) == 1 - assert isinstance(measurements_set.status.targets["nosql"], DataTarget) + if explicit_targets: + # assert that online target exist (nosql) and offline target does not (parquet) + assert len(measurements_set.status.targets) == 1 + assert isinstance(measurements_set.status.targets["nosql"], DataTarget) + else: + assert len(measurements_set.status.targets) == 0 # verify that get_offline (and preview) equals the source vector = fstore.FeatureVector("myvector", features=[f"{name}.*"]) @@ -937,31 +947,33 @@ def test_passthrough_feature_set(self, engine): get_offline_pd = resp.to_dataframe() # check time filter with passthrough - expected = expected[(expected["timestamp"] > start_time)] + if start_time: + expected = expected[(expected["timestamp"] > start_time)] assert_frame_equal(expected, get_offline_pd, check_like=True, check_dtype=False) - # assert get_online correctness - with fstore.get_online_feature_service(vector) as svc: - resp = svc.get([{"patient_id": "305-90-1613"}]) - assert resp == [ - { - "bad": 95, - "department": "01e9fe31-76de-45f0-9aed-0f94cc97bca0", - "room": 2, - "hr": 220.0, - "hr_is_error": False, - "rr": 25, - "rr_is_error": False, - "spo2": 99, - "spo2_is_error": False, - "movements": 4.614601941071927, - "movements_is_error": False, - "turn_count": 0.3582583538239813, - "turn_count_is_error": False, - "is_in_bed": 1, - "is_in_bed_is_error": False, - } - ] + if explicit_targets: + # assert get_online correctness + with fstore.get_online_feature_service(vector) as svc: + resp = svc.get([{"patient_id": "305-90-1613"}]) + assert resp == [ + { + "bad": 95, + "department": "01e9fe31-76de-45f0-9aed-0f94cc97bca0", + "room": 2, + "hr": 220.0, + "hr_is_error": False, + "rr": 25, + "rr_is_error": False, + "spo2": 99, + "spo2_is_error": False, + "movements": 4.614601941071927, + "movements_is_error": False, + "turn_count": 0.3582583538239813, + "turn_count_is_error": False, + "is_in_bed": 1, + "is_in_bed_is_error": False, + } + ] def test_ingest_twice_with_nulls(self): name = f"test_ingest_twice_with_nulls_{uuid.uuid4()}" @@ -1402,11 +1414,7 @@ def test_schedule_on_filtered_by_time(self, partitioned): ) assert path == data_set.get_target_path() - source = ParquetSource( - "myparquet", - path=path, - schedule="mock", - ) + source = ParquetSource("myparquet", path=path, schedule="mock") feature_set = fstore.FeatureSet( name=name, @@ -2149,10 +2157,18 @@ def get_v3io_api_host(): api = None if config.v3io_api: api = config.v3io_api + + # strip protocol if "//" in api: api = api[api.find("//") + 2 :] + + # strip port if ":" in api: api = api[: api.find(":")] + + # ensure webapi prefix + if not api.startswith("webapi."): + api = f"webapi.{api}" return api key = "patient_id" @@ -2170,7 +2186,7 @@ def get_v3io_api_host(): ), NoSqlTarget( name="fullpath", - path=f"v3io://webapi.{get_v3io_api_host()}/bigdata/system-test-project/nosql-purge-full", + path=f"v3io://{get_v3io_api_host()}/bigdata/system-test-project/nosql-purge-full", ), ] @@ -3531,7 +3547,7 @@ def test_relation_join(self, engine, with_indexes): "managers", entities=[managers_set_entity], ) - managers_set.set_targets(targets=["parquet"], with_defaults=False) + managers_set.set_targets() fstore.ingest(managers_set, managers) classes_set_entity = fstore.Entity("c_id") @@ -3539,7 +3555,7 @@ def test_relation_join(self, engine, with_indexes): "classes", entities=[classes_set_entity], ) - managers_set.set_targets(targets=["parquet"], with_defaults=False) + managers_set.set_targets() fstore.ingest(classes_set, classes) departments_set_entity = fstore.Entity("d_id") @@ -3548,7 +3564,7 @@ def test_relation_join(self, engine, with_indexes): entities=[departments_set_entity], relations={"m_id": managers_set_entity}, ) - departments_set.set_targets(targets=["parquet"], with_defaults=False) + departments_set.set_targets() fstore.ingest(departments_set, departments) employees_set_entity = fstore.Entity("id") @@ -3557,7 +3573,7 @@ def test_relation_join(self, engine, with_indexes): entities=[employees_set_entity], relations={"department_id": departments_set_entity}, ) - employees_set.set_targets(targets=["parquet"], with_defaults=False) + employees_set.set_targets() fstore.ingest(employees_set, employees_with_department) mini_employees_set = fstore.FeatureSet( @@ -3568,7 +3584,7 @@ def test_relation_join(self, engine, with_indexes): "class_id": classes_set_entity, }, ) - mini_employees_set.set_targets(targets=["parquet"], with_defaults=False) + mini_employees_set.set_targets() fstore.ingest(mini_employees_set, employees_with_class) features = ["employees.name"] @@ -3595,6 +3611,11 @@ def test_relation_join(self, engine, with_indexes): pd.DataFrame(employees_with_department, columns=["name"]), resp.to_dataframe(), ) + + with fstore.get_online_feature_service(vector) as svc: + resp = svc.get({"id": 100}) + assert resp[0] == {"name": "employee100"} + features = ["employees.name as n", "departments.name as n2"] vector = fstore.FeatureVector( @@ -3611,6 +3632,10 @@ def test_relation_join(self, engine, with_indexes): ) assert_frame_equal(join_employee_department, resp_1.to_dataframe()) + with fstore.get_online_feature_service(vector, entity_keys=["id"]) as svc: + resp = svc.get({"id": 100}) + assert resp[0] == {"n": "employee100", "n2": "dept1"} + features = [ "employees.name as n", "departments.name as n2", @@ -3631,6 +3656,14 @@ def test_relation_join(self, engine, with_indexes): ) assert_frame_equal(join_employee_managers, resp_2.to_dataframe()) + with fstore.get_online_feature_service(vector, entity_keys=["id"]) as svc: + resp = svc.get({"id": 100}) + assert resp[0] == { + "n": "employee100", + "n2": "dept1", + "man_name": "manager10", + } + features = ["employees.name as n", "mini-employees.name as mini_name"] vector = fstore.FeatureVector( @@ -3646,6 +3679,9 @@ def test_relation_join(self, engine, with_indexes): order_by="name", ) assert_frame_equal(join_employee_sets, resp_3.to_dataframe()) + with fstore.get_online_feature_service(vector, entity_keys=["id"]) as svc: + resp = svc.get({"id": 100}) + assert resp[0] == {"n": "employee100", "mini_name": "employee100"} features = [ "employees.name as n", @@ -3668,6 +3704,15 @@ def test_relation_join(self, engine, with_indexes): ) assert_frame_equal(join_all, resp_4.to_dataframe()) + with fstore.get_online_feature_service(vector, entity_keys=["id"]) as svc: + resp = svc.get({"id": 100}) + assert resp[0] == { + "n": "employee100", + "n2": "dept1", + "mini_name": "employee100", + "name_cls": "class20", + } + @pytest.mark.parametrize("with_indexes", [True, False]) @pytest.mark.parametrize("engine", ["local", "dask"]) def test_relation_join_multi_entities(self, engine, with_indexes): diff --git a/tests/system/feature_store/test_spark_engine.py b/tests/system/feature_store/test_spark_engine.py index fcbb029e87..aa2a2727ae 100644 --- a/tests/system/feature_store/test_spark_engine.py +++ b/tests/system/feature_store/test_spark_engine.py @@ -44,7 +44,6 @@ OneHotEncoder, ) from mlrun.features import Entity -from mlrun.model import DataTarget from tests.system.base import TestMLRunSystem from tests.system.feature_store.data_sample import stocks from tests.system.feature_store.expected_stats import expected_stats @@ -822,9 +821,7 @@ def test_aggregations(self): ] vector = fstore.FeatureVector("my-vec", features) - resp = fstore.get_offline_features( - vector, entity_timestamp_column="time", with_indexes=True - ) + resp = fstore.get_offline_features(vector, with_indexes=True) # We can't count on the order when reading the results back result_records = ( @@ -1399,21 +1396,19 @@ def test_get_offline_features_with_spark_engine(self, passthrough, target_type): passthrough=passthrough, ) source = ParquetSource("myparquet", path=self.get_pq_source_path()) - self.set_targets(measurements) + if not passthrough: + self.set_targets(measurements) fstore.ingest( measurements, source, spark_context=self.spark_service, run_config=fstore.RunConfig(local=self.run_local), ) - if not self.run_local: + if passthrough: + assert len(measurements.status.targets) == 0 + elif not self.run_local: assert measurements.status.targets[0].run_id is not None - # assert that online target exist (nosql) and offline target does not (parquet) - if passthrough and not self.run_local: - assert len(measurements.status.targets) == 1 - assert isinstance(measurements.status.targets["nosql"], DataTarget) - fv_name = "measurements-fv" features = [ "measurements.bad", diff --git a/tests/system/model_monitoring/assets/model.pkl b/tests/system/model_monitoring/assets/model.pkl index 2c2f805d36..d91d9a6fe3 100644 Binary files a/tests/system/model_monitoring/assets/model.pkl and b/tests/system/model_monitoring/assets/model.pkl differ diff --git a/tests/system/model_monitoring/test_model_monitoring.py b/tests/system/model_monitoring/test_model_monitoring.py index 52aa74a58a..b00062991e 100644 --- a/tests/system/model_monitoring/test_model_monitoring.py +++ b/tests/system/model_monitoring/test_model_monitoring.py @@ -239,7 +239,6 @@ def test_basic_model_monitoring(self): # 1 - a single model endpoint is created # 2 - stream metrics are recorded as expected under the model endpoint - simulation_time = 90 # 90 seconds # Deploy Model Servers project = mlrun.get_run_db().get_project(self.project_name) @@ -284,13 +283,13 @@ def test_basic_model_monitoring(self): # Simulating valid requests iris_data = iris["data"].tolist() - t_end = monotonic() + simulation_time - while monotonic() < t_end: + + for i in range(102): data_point = choice(iris_data) serving_fn.invoke( f"v2/models/{model_name}/infer", json.dumps({"inputs": [data_point]}) ) - sleep(uniform(0.2, 1.1)) + sleep(choice([0.01, 0.04])) # Test metrics endpoints_list = mlrun.get_run_db().list_model_endpoints( @@ -301,6 +300,8 @@ def test_basic_model_monitoring(self): endpoint = endpoints_list[0] assert len(endpoint.status.metrics) > 0 + assert endpoint.status.metrics["generic"]["predictions_count_5m"] == 101 + predictions_per_second = endpoint.status.metrics["real_time"][ "predictions_per_second" ] diff --git a/tests/system/projects/assets/pipeline_with_resource_param.py b/tests/system/projects/assets/pipeline_with_resource_param.py new file mode 100644 index 0000000000..3283969cb7 --- /dev/null +++ b/tests/system/projects/assets/pipeline_with_resource_param.py @@ -0,0 +1,32 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import kfp + +import mlrun + + +@kfp.dsl.pipeline( + name="Demo passing param to function spec", description="Shows how to use mlrun." +) +def kfpipeline(memory: str = "10Mi"): + time_to_sleep = 2 + project: mlrun.projects.MlrunProject = mlrun.get_current_project() + func: mlrun.runtimes.KubejobRuntime = project.get_function("func-1") + func.with_requests(mem=str(memory)) + mlrun.run_function( + func, + params={"time_to_sleep": time_to_sleep}, + outputs=["return"], + ) diff --git a/tests/system/projects/assets/workflow.py b/tests/system/projects/assets/workflow.py index eb12126438..f7a5aab32c 100644 --- a/tests/system/projects/assets/workflow.py +++ b/tests/system/projects/assets/workflow.py @@ -16,6 +16,7 @@ def kfpipeline(): + # sleeping for 60 seconds to be able to abort the run in the middle of the execution time_to_sleep = 60 step_1 = mlrun.run_function( diff --git a/tests/system/projects/test_project.py b/tests/system/projects/test_project.py index 2cc565cd68..e77a8b457e 100644 --- a/tests/system/projects/test_project.py +++ b/tests/system/projects/test_project.py @@ -24,6 +24,7 @@ from kfp import dsl import mlrun +import mlrun.common.schemas import mlrun.utils.logger from mlrun.artifacts import Artifact from mlrun.model import EntrypointParam @@ -87,6 +88,7 @@ def assets_path(self): ) def _create_project(self, project_name, with_repo=False, overwrite=False): + self.custom_project_names_to_delete.append(project_name) proj = mlrun.new_project( project_name, str(self.assets_path), overwrite=overwrite ) @@ -166,8 +168,6 @@ def test_build_function_image_usability(self): def test_run(self): name = "pipe0" - self.custom_project_names_to_delete.append(name) - # create project in context self._create_project(name) # load project from context dir and run a workflow @@ -198,8 +198,6 @@ def test_run(self): def test_run_artifact_path(self): name = "pipe1" - self.custom_project_names_to_delete.append(name) - # create project in context self._create_project(name) # load project from context dir and run a workflow @@ -343,10 +341,9 @@ def test_cli_with_remote(self): def test_inline_pipeline(self): name = "pipe5" - self.custom_project_names_to_delete.append(name) project_dir = f"{projects_dir}/{name}" shutil.rmtree(project_dir, ignore_errors=True) - project = self._create_project(name, True) + project = self._create_project(name, with_repo=True) run = project.run( artifact_path=f"v3io:///projects/{name}/artifacts", workflow_handler=pipe_test, @@ -459,7 +456,6 @@ def test_overwrite_project_failure(self): def _test_new_pipeline(self, name, engine): project = self._create_project(name) - self.custom_project_names_to_delete.append(name) project.set_function( "gen_iris.py", "gen-iris", @@ -485,6 +481,62 @@ def test_local_pipeline(self): def test_kfp_pipeline(self): self._test_new_pipeline("kfppipe", engine="kfp") + def test_kfp_runs_getting_deleted_on_project_deletion(self): + project_name = "kfppipedelete" + self.custom_project_names_to_delete.append(project_name) + + project = self._create_project(project_name) + self._initialize_sleep_workflow(project) + project.run("main", engine="kfp") + + db = mlrun.get_run_db() + project_pipeline_runs = db.list_pipelines(project=project_name) + # expecting to have pipeline run + assert ( + project_pipeline_runs.runs + ), "no pipeline runs found for project, expected to have pipeline run" + # deleting project with deletion strategy cascade so it will delete any related resources ( pipelines as well ) + db.delete_project( + name=project_name, + deletion_strategy=mlrun.common.schemas.DeletionStrategy.cascade, + ) + # create the project again ( using new_project, instead of get_or_create_project so it won't create project + # from project.yaml in the context that might contain project.yaml + mlrun.new_project(project_name) + + project_pipeline_runs = db.list_pipelines(project=project_name) + assert ( + not project_pipeline_runs.runs + ), "pipeline runs found for project after deletion, expected to be empty" + + def test_kfp_pipeline_with_resource_param_passed(self): + project_name = "test-pipeline-with-resource-param" + self.custom_project_names_to_delete.append(project_name) + project = mlrun.new_project(project_name, context=str(self.assets_path)) + + code_path = str(self.assets_path / "sleep.py") + workflow_path = str(self.assets_path / "pipeline_with_resource_param.py") + + project.set_function( + name="func-1", + func=code_path, + kind="job", + image="mlrun/mlrun", + handler="handler", + ) + # set and run a two-step workflow in the project + project.set_workflow("paramflow", workflow_path) + + arguments = {"memory": "11Mi"} + pipeline_status = project.run( + "paramflow", engine="kfp", arguments=arguments, watch=True + ) + assert pipeline_status.workflow.args == arguments + + # get the function from the db + function = project.get_function("func-1", ignore_cache=True) + assert function.spec.resources["requests"]["memory"] == arguments["memory"] + def _test_remote_pipeline_from_github( self, name, workflow_name, engine=None, local=None, watch=False ): @@ -546,7 +598,6 @@ def test_non_existent_run_id_in_pipeline(self): def test_remote_from_archive(self): name = "pipe6" - self.custom_project_names_to_delete.append(name) project = self._create_project(name) archive_path = f"v3io:///projects/{project.name}/archive1.zip" project.export(archive_path) @@ -596,7 +647,6 @@ def test_kfp_from_local_code(self): def test_local_cli(self): # load project from git name = "lclclipipe" - self.custom_project_names_to_delete.append(name) project = self._create_project(name) project.set_function( "gen_iris.py", @@ -837,7 +887,6 @@ def test_failed_schedule_workflow_non_remote_source(self): name = "non-remote-fail" # Creating a local project project = self._create_project(name) - self.custom_project_names_to_delete.append(name) with pytest.raises(mlrun.errors.MLRunInvalidArgumentError): project.run("main", schedule="*/10 * * * *") @@ -926,29 +975,12 @@ def test_abort_step_in_workflow(self, pull_state_mode): # when pull_state mode is enabled it simulates the flow of wait_for_completion mlrun.mlconf.httpdb.logs.pipelines.pull_state.mode = pull_state_mode - code_path = str(self.assets_path / "sleep.py") - workflow_path = str(self.assets_path / "workflow.py") - - project.set_function( - name="func-1", - func=code_path, - kind="job", - image="mlrun/mlrun", - handler="handler", - ) - project.set_function( - name="func-2", - func=code_path, - kind="job", - image="mlrun/mlrun", - handler="handler", - ) - def _assert_workflow_status(workflow, status): assert workflow.state == status - # set and run a two-step workflow in the project - project.set_workflow("main", workflow_path) + self._initialize_sleep_workflow(project) + + # run a two-step workflow in the project workflow = project.run("main", engine="kfp") mlrun.utils.retry_until_successful( @@ -1052,3 +1084,22 @@ def test_project_build_config_export_import(self): assert run_result.output("score") shutil.rmtree(project_dir, ignore_errors=True) + + def _initialize_sleep_workflow(self, project: mlrun.projects.MlrunProject): + code_path = str(self.assets_path / "sleep.py") + workflow_path = str(self.assets_path / "workflow.py") + project.set_function( + name="func-1", + func=code_path, + kind="job", + image="mlrun/mlrun", + handler="handler", + ) + project.set_function( + name="func-2", + func=code_path, + kind="job", + image="mlrun/mlrun", + handler="handler", + ) + project.set_workflow("main", workflow_path) diff --git a/tests/system/pytest.ini b/tests/system/pytest.ini new file mode 100644 index 0000000000..33c5b3d3bd --- /dev/null +++ b/tests/system/pytest.ini @@ -0,0 +1,14 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/tests/system/runtimes/test_notifications.py b/tests/system/runtimes/test_notifications.py index 00373ef09f..b84ae9ab53 100644 --- a/tests/system/runtimes/test_notifications.py +++ b/tests/system/runtimes/test_notifications.py @@ -195,7 +195,7 @@ def _create_notification( when=when or ["completed"], name=name or "test-notification", message=message or "test-notification-message", - condition=condition or "", + condition=condition, severity=severity or "info", params=params or {}, ) diff --git a/tests/test_cli.py b/tests/test_cli.py index 93726cee89..2ee66a5783 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -13,8 +13,12 @@ # limitations under the License. import pathlib +from sqlalchemy.orm import Session + import mlrun.projects from mlrun.__main__ import load_notification +from mlrun.api.db.base import DBInterface +from mlrun.artifacts.plots import ChartArtifact def test_add_notification_to_cli_from_file(): @@ -46,3 +50,41 @@ def test_add_notification_to_cli_from_dict(): project._notifiers._sync_notifications["ipython"].params.get("webhook") == "1234" ) + + +def test_cli_get_artifacts_with_uri(db: DBInterface, db_session: Session): + artifact_key = "artifact_test" + artifact_uid = "artifact_uid" + artifact_kind = ChartArtifact.kind + artifact = generate_artifact(artifact_key, kind=artifact_kind) + + db.store_artifact( + db_session, + artifact_key, + artifact, + artifact_uid, + ) + + artifacts = db.list_artifacts(db_session) + assert len(artifacts) == 1 + + # this is the function called when executing the get artifacts cli command + df = artifacts.to_df() + + # check that the uri is returned + assert "uri" in df + + +def generate_artifact(name, uid=None, kind=None): + artifact = { + "metadata": {"name": name}, + "spec": {"src_path": "/some/path"}, + "kind": kind, + "status": {"bla": "blabla"}, + } + if kind: + artifact["kind"] = kind + if uid: + artifact["metadata"]["uid"] = uid + + return artifact diff --git a/tests/rundb/test_rundb.py b/tests/test_render.py similarity index 52% rename from tests/rundb/test_rundb.py rename to tests/test_render.py index d9f23c84a7..407e89aee1 100644 --- a/tests/rundb/test_rundb.py +++ b/tests/test_render.py @@ -12,32 +12,58 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pathlib + import pytest import mlrun +import mlrun.render from tests.conftest import results, rundb_path +assets_path = pathlib.Path(__file__).parent / "assets" +function_path = str(assets_path / "log_function.py") + def get_db(): return mlrun.get_run_db(rundb_path) -# -# pprint.pprint(db.list_runs()[:2]) - -# FIXME: this test was counting on the fact it's running after some test (I think test_httpdb) which leaves runs and -# artifacts in the `results` dir, it should generate its own stuff, skipping for now -@pytest.mark.skip("FIX_ME") -def test_list_runs(): +@pytest.mark.parametrize( + "generate_artifact_hash_mode, expected_target_paths", + [ + ( + False, + [ + f"{results}/log-function-log-dataset/0/feature_1.csv", + f"{results}/log-function-log-dataset/0/feature_2.csv", + ], + ), + ( + True, + [ + f"{results}/6154c46f1a6fffb0b6b716882279d7e09ecb6b8a.csv", + f"{results}/c88c2dc877a6595cb2eb834449aac6e2789d301c.csv", + ], + ), + ], +) +def test_list_runs(rundb_mock, generate_artifact_hash_mode, expected_target_paths): + mlrun.mlconf.artifacts.generate_target_path_from_artifact_hash = ( + generate_artifact_hash_mode + ) + func = mlrun.code_to_function( + filename=function_path, kind="job", handler="log_dataset" + ) + func.run(local=True, out_path=str(results)) db = get_db() runs = db.list_runs() assert runs, "empty runs result" html = runs.show(display=False) - - with open(f"{results}/runs.html", "w") as fp: - fp.write(html) + for expected_target_path in expected_target_paths: + expected_link, _ = mlrun.render.link_to_ipython(expected_target_path) + assert expected_link in html # FIXME: this test was counting on the fact it's running after some test (I think test_httpdb) which leaves runs and diff --git a/tests/test_requirements.py b/tests/test_requirements.py index becd881bd5..f4ed68669a 100644 --- a/tests/test_requirements.py +++ b/tests/test_requirements.py @@ -94,7 +94,7 @@ def test_requirement_specifiers_convention(): # See comment near requirement for why we're limiting to patch changes only for all of these "kfp": {"~=1.8.0, <1.8.14"}, "aiobotocore": {"~=2.4.2"}, - "storey": {"~=1.4.3"}, + "storey": {"~=1.4.4"}, "bokeh": {"~=2.4, >=2.4.2"}, "typing-extensions": {">=3.10.0,<5"}, "sphinx": {"~=4.3.0"}, @@ -131,6 +131,7 @@ def test_requirement_specifiers_convention(): "importlib_metadata": {">=3.6"}, "gitpython": {"~=3.1, >= 3.1.30"}, "orjson": {"~=3.3, <3.8.12"}, + "pydantic": {"~=1.10, >=1.10.8"}, "pyopenssl": {">=23"}, "google-cloud-bigquery": {"[pandas, bqstorage]~=3.2"}, # plotly artifact body in 5.12.0 may contain chars that are not encodable in 'latin-1' encoding diff --git a/tests/utils/test_helpers.py b/tests/utils/test_helpers.py index 12a1d29703..b1e405f7f1 100644 --- a/tests/utils/test_helpers.py +++ b/tests/utils/test_helpers.py @@ -94,6 +94,19 @@ def test_run_name_regex(value, expected): verify_field_regex("test_field", value, mlrun.utils.regex.run_name) +@pytest.mark.parametrize( + "value, expected", + [ + ("{{pipelineparam:op=;name=mem}}", does_not_raise()), + ("{{pipelineparam:op=2;name=mem}}", does_not_raise()), + ("{{pipelineparam:op=10Mb;name=mem}}", does_not_raise()), + ], +) +def test_pipeline_param(value, expected): + with expected: + verify_field_regex("test_field", value, mlrun.utils.regex.pipeline_param) + + @pytest.mark.parametrize( "value,expected", [ diff --git a/tests/utils/test_notifications.py b/tests/utils/test_notifications.py index b7ff988916..b1cb964e19 100644 --- a/tests/utils/test_notifications.py +++ b/tests/utils/test_notifications.py @@ -14,6 +14,8 @@ import asyncio import builtins +import copy +import json import unittest.mock from contextlib import nullcontext as does_not_raise @@ -395,6 +397,54 @@ def _store_project_secrets(*args, **kwargs): ) +def test_notification_params_unmasking_on_run(monkeypatch): + + secret_value = {"sensitive": "sensitive-value"} + run = { + "metadata": {"uid": "test-run-uid", "project": "test-project"}, + "spec": { + "notifications": [ + { + "name": "test-notification", + "when": ["completed"], + "params": {"secret": "secret-name"}, + }, + ], + }, + } + + def _get_valid_project_secret(*args, **kwargs): + return json.dumps(secret_value) + + def _get_invalid_project_secret(*args, **kwargs): + return json.dumps(secret_value)[:5] + + db_mock = unittest.mock.Mock() + db_session_mock = unittest.mock.Mock() + + monkeypatch.setattr( + mlrun.api.crud.Secrets, "get_project_secret", _get_valid_project_secret + ) + + unmasked_run = mlrun.api.api.utils.unmask_notification_params_secret_on_task( + db_mock, db_session_mock, copy.deepcopy(run) + ) + assert "sensitive" in unmasked_run.spec.notifications[0].params + assert "secret" not in unmasked_run.spec.notifications[0].params + assert unmasked_run.spec.notifications[0].params == secret_value + + monkeypatch.setattr( + mlrun.api.crud.Secrets, "get_project_secret", _get_invalid_project_secret + ) + unmasked_run = mlrun.api.api.utils.unmask_notification_params_secret_on_task( + db_mock, db_session_mock, copy.deepcopy(run) + ) + assert len(unmasked_run.spec.notifications) == 0 + db_mock.store_run_notifications.assert_called_once() + args, _ = db_mock.store_run_notifications.call_args + assert args[1][0].status == mlrun.common.schemas.NotificationStatus.ERROR + + NOTIFICATION_VALIDATION_PARMETRIZE = [ ( { @@ -432,6 +482,42 @@ def _store_project_secrets(*args, **kwargs): }, does_not_raise(), ), + ( + { + "when": "invalid-when", + }, + pytest.raises(mlrun.errors.MLRunInvalidArgumentError), + ), + ( + { + "when": ["completed", "error"], + }, + does_not_raise(), + ), + ( + { + "message": {"my-message": "invalid"}, + }, + pytest.raises(mlrun.errors.MLRunInvalidArgumentError), + ), + ( + { + "message": "completed", + }, + does_not_raise(), + ), + ( + { + "condition": ["invalid-condition"], + }, + pytest.raises(mlrun.errors.MLRunInvalidArgumentError), + ), + ( + { + "condition": "valid-condition", + }, + does_not_raise(), + ), ] @@ -446,6 +532,24 @@ def test_notification_validation_on_object( mlrun.model.Notification(**notification_kwargs) +def test_notification_validation_defaults(monkeypatch): + notification = mlrun.model.Notification() + notification_fields = { + "kind": mlrun.common.schemas.notification.NotificationKind.slack, + "message": "", + "severity": mlrun.common.schemas.notification.NotificationSeverity.INFO, + "when": ["completed"], + "condition": "", + "name": "", + } + + for field, expected_value in notification_fields.items(): + value = getattr(notification, field) + assert ( + value == expected_value + ), f"{field} field value is {value}, expected {expected_value}" + + @pytest.mark.parametrize( "notification_kwargs,expectation", NOTIFICATION_VALIDATION_PARMETRIZE,