diff --git a/.github/workflows/basic-ci-amd64.yaml b/.github/workflows/basic-ci-amd64.yaml index 119891853098..714f27f7f0bc 100644 --- a/.github/workflows/basic-ci-amd64.yaml +++ b/.github/workflows/basic-ci-amd64.yaml @@ -22,7 +22,7 @@ jobs: fail-fast: false matrix: containerd_version: ['lts', 'active'] - vmm: ['clh', 'qemu'] + vmm: ['clh', 'qemu', 'stratovirt'] runs-on: garm-ubuntu-2204-smaller env: CONTAINERD_VERSION: ${{ matrix.containerd_version }} @@ -60,7 +60,7 @@ jobs: fail-fast: false matrix: containerd_version: ['lts', 'active'] - vmm: ['clh', 'qemu'] + vmm: ['clh', 'qemu', 'stratovirt'] runs-on: garm-ubuntu-2204-smaller env: CONTAINERD_VERSION: ${{ matrix.containerd_version }} @@ -101,7 +101,7 @@ jobs: fail-fast: false matrix: containerd_version: ['lts', 'active'] - vmm: ['clh', 'qemu', 'dragonball'] + vmm: ['clh', 'qemu', 'dragonball', 'stratovirt'] runs-on: garm-ubuntu-2204-smaller env: CONTAINERD_VERSION: ${{ matrix.containerd_version }} diff --git a/.github/workflows/build-kata-static-tarball-amd64.yaml b/.github/workflows/build-kata-static-tarball-amd64.yaml index faec28373c20..d418b1e93d4c 100644 --- a/.github/workflows/build-kata-static-tarball-amd64.yaml +++ b/.github/workflows/build-kata-static-tarball-amd64.yaml @@ -48,6 +48,7 @@ jobs: - qemu - qemu-snp-experimental - qemu-tdx-experimental + - stratovirt - rootfs-image - rootfs-image-tdx - rootfs-initrd @@ -107,7 +108,7 @@ jobs: with: name: kata-artifacts-amd64${{ inputs.tarball-suffix }} path: kata-build/kata-static-${{ matrix.asset }}.tar.xz - retention-days: 1 + retention-days: 15 if-no-files-found: error create-kata-tarball: @@ -136,5 +137,5 @@ jobs: with: name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} path: kata-static.tar.xz - retention-days: 1 + retention-days: 15 if-no-files-found: error diff --git a/.github/workflows/build-kata-static-tarball-arm64.yaml b/.github/workflows/build-kata-static-tarball-arm64.yaml index 89e019e6f7db..76e972a565b7 100644 --- a/.github/workflows/build-kata-static-tarball-arm64.yaml +++ b/.github/workflows/build-kata-static-tarball-arm64.yaml @@ -33,6 +33,7 @@ jobs: - kernel-dragonball-experimental - nydus - qemu + - stratovirt - rootfs-image - rootfs-initrd - shim-v2 @@ -83,7 +84,7 @@ jobs: with: name: kata-artifacts-arm64${{ inputs.tarball-suffix }} path: kata-build/kata-static-${{ matrix.asset }}.tar.xz - retention-days: 1 + retention-days: 15 if-no-files-found: error create-kata-tarball: @@ -116,5 +117,5 @@ jobs: with: name: kata-static-tarball-arm64${{ inputs.tarball-suffix }} path: kata-static.tar.xz - retention-days: 1 + retention-days: 15 if-no-files-found: error diff --git a/.github/workflows/build-kata-static-tarball-ppc64le.yaml b/.github/workflows/build-kata-static-tarball-ppc64le.yaml new file mode 100644 index 000000000000..f413114e704f --- /dev/null +++ b/.github/workflows/build-kata-static-tarball-ppc64le.yaml @@ -0,0 +1,116 @@ +name: CI | Build kata-static tarball for ppc64le +on: + workflow_call: + inputs: + stage: + required: false + type: string + default: test + tarball-suffix: + required: false + type: string + push-to-registry: + required: false + type: string + default: no + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + build-asset: + runs-on: ppc + strategy: + matrix: + asset: + - kernel + - qemu + - rootfs-initrd + - shim-v2 + - virtiofsd + stage: + - ${{ inputs.stage }} + steps: + - name: Adjust a permission for repo + run: | + sudo chown -R $USER:$USER $GITHUB_WORKSPACE + + - name: Login to Kata Containers quay.io + if: ${{ inputs.push-to-registry == 'yes' }} + uses: docker/login-action@v2 + with: + registry: quay.io + username: ${{ secrets.QUAY_DEPLOYER_USERNAME }} + password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 # This is needed in order to keep the commit ids history + + # - name: Rebase atop of the latest target branch + # run: | + # ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + # env: + # TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Build ${{ matrix.asset }} + run: | + make "${KATA_ASSET}-tarball" + build_dir=$(readlink -f build) + # store-artifact does not work with symlink + sudo cp -r "${build_dir}" "kata-build" + sudo chown -R $(id -u):$(id -g) "kata-build" + env: + KATA_ASSET: ${{ matrix.asset }} + TAR_OUTPUT: ${{ matrix.asset }}.tar.gz + PUSH_TO_REGISTRY: ${{ inputs.push-to-registry }} + ARTEFACT_REGISTRY: ghcr.io + ARTEFACT_REGISTRY_USERNAME: ${{ github.actor }} + ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: store-artifact ${{ matrix.asset }} + uses: actions/upload-artifact@v3 + with: + name: kata-artifacts-ppc64le${{ inputs.tarball-suffix }} + path: kata-build/kata-static-${{ matrix.asset }}.tar.xz + retention-days: 1 + if-no-files-found: error + + create-kata-tarball: + runs-on: ubuntu-latest + needs: build-asset + steps: + - name: Adjust a permission for repo + run: | + sudo chown -R $USER:$USER $GITHUB_WORKSPACE + + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + # - name: Rebase atop of the latest target branch + # run: | + # ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + # env: + # TARGET_BRANCH: ${{ inputs.target-branch }} + - name: get-artifacts + uses: actions/download-artifact@v3 + with: + name: kata-artifacts-ppc64le${{ inputs.tarball-suffix }} + path: kata-artifacts + - name: merge-artifacts + run: | + ./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts versions.yaml + - name: store-artifacts + uses: actions/upload-artifact@v3 + with: + name: kata-static-tarball-ppc64le${{ inputs.tarball-suffix }} + path: kata-static.tar.xz + retention-days: 1 + if-no-files-found: error diff --git a/.github/workflows/build-kata-static-tarball-s390x.yaml b/.github/workflows/build-kata-static-tarball-s390x.yaml index 9997678e75de..f37056cc6894 100644 --- a/.github/workflows/build-kata-static-tarball-s390x.yaml +++ b/.github/workflows/build-kata-static-tarball-s390x.yaml @@ -80,7 +80,7 @@ jobs: with: name: kata-artifacts-s390x${{ inputs.tarball-suffix }} path: kata-build/kata-static-${{ matrix.asset }}.tar.xz - retention-days: 1 + retention-days: 15 if-no-files-found: error create-kata-tarball: @@ -113,5 +113,5 @@ jobs: with: name: kata-static-tarball-s390x${{ inputs.tarball-suffix }} path: kata-static.tar.xz - retention-days: 1 + retention-days: 15 if-no-files-found: error diff --git a/.github/workflows/ci-nightly-s390x.yaml b/.github/workflows/ci-nightly-s390x.yaml new file mode 100644 index 000000000000..9b2379fa48d0 --- /dev/null +++ b/.github/workflows/ci-nightly-s390x.yaml @@ -0,0 +1,47 @@ +on: + schedule: + - cron: '0 5 * * *' + +name: Nightly CI for s390x +jobs: + check-internal-test-result: + runs-on: s390x + strategy: + fail-fast: false + matrix: + test_title: + - kata-vfio-ap-e2e-tests + steps: + - name: Fetch a test result for {{ matrix.test_title }} + run: | + file_name="${TEST_TITLE}-$(date +%Y-%m-%d).log" + /home/${USER}/script/handle_test_log.sh download $file_name + env: + TEST_TITLE: ${{ matrix.test_title }} + + k8s-cri-containerd-rhel9-e2e-tests: + runs-on: s390x-rhel9 + steps: + - name: Delete the existing files + run: | + sudo chown -R $USER:$USER $GITHUB_WORKSPACE + sudo rm -rf $GITHUB_WORKSPACE/* + + - name: Take a pre-action for self-hosted runner + run: | + ${HOME}/script/pre_action.sh rhel9-nightly + + - name: Run k8s/cri-containerd e2e tests on RHEL9 + run: | + export WORKSPACE=$GITHUB_WORKSPACE + export GITHUB_ACTION="" + bash ci_crio_entry_point.sh + env: + BAREMETAL: "true" + REPO_OWNER: "cri-o" + REPO_NAME: "cri-o" + + - name: Take a post-action for self-hosted runner + if: always() + run: | + ${HOME}/script/post_action.sh rhel9-nightly diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4203b4aa9fac..ca016a79cd4d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -36,6 +36,45 @@ jobs: target-branch: ${{ inputs.target-branch }} secrets: inherit + build-kata-static-tarball-s390x: + uses: ./.github/workflows/build-kata-static-tarball-s390x.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + + build-kata-static-tarball-ppc64le: + uses: ./.github/workflows/build-kata-static-tarball-ppc64le.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + + + publish-kata-deploy-payload-s390x: + needs: build-kata-static-tarball-s390x + uses: ./.github/workflows/publish-kata-deploy-payload-s390x.yaml + with: + tarball-suffix: -${{ inputs.tag }} + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-s390x + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + secrets: inherit + + publish-kata-deploy-payload-ppc64le: + needs: build-kata-static-tarball-ppc64le + uses: ./.github/workflows/publish-kata-deploy-payload-ppc64le.yaml + with: + tarball-suffix: -${{ inputs.tag }} + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-ppc64le + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + secrets: inherit + build-and-publish-tee-confidential-unencrypted-image: runs-on: ubuntu-latest steps: @@ -152,6 +191,17 @@ jobs: pr-number: ${{ inputs.pr-number }} target-branch: ${{ inputs.target-branch }} + run-k8s-tests-on-zvsi: + needs: [publish-kata-deploy-payload-s390x, build-and-publish-tee-confidential-unencrypted-image] + uses: ./.github/workflows/run-k8s-tests-on-zvsi.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-s390x + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + run-metrics-tests: needs: build-kata-static-tarball-amd64 uses: ./.github/workflows/run-metrics.yaml @@ -167,3 +217,19 @@ jobs: tarball-suffix: -${{ inputs.tag }} commit-hash: ${{ inputs.commit-hash }} target-branch: ${{ inputs.target-branch }} + + run-cri-containerd-tests-s390x: + needs: build-kata-static-tarball-s390x + uses: ./.github/workflows/run-cri-containerd-tests-s390x.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + + run-cri-containerd-tests-ppc64le: + needs: build-kata-static-tarball-ppc64le + uses: ./.github/workflows/run-cri-containerd-tests-ppc64le.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} diff --git a/.github/workflows/payload-after-push.yaml b/.github/workflows/payload-after-push.yaml index c1cce2c739e6..6350de5dde9e 100644 --- a/.github/workflows/payload-after-push.yaml +++ b/.github/workflows/payload-after-push.yaml @@ -1,6 +1,6 @@ name: CI | Publish Kata Containers payload on: - push: + pull_request: branches: - main - stable-* @@ -34,6 +34,21 @@ jobs: push-to-registry: yes target-branch: ${{ github.ref_name }} secrets: inherit + + build-assets-ppc64le: + uses: ./.github/workflows/build-kata-static-tarball-ppc64le.yaml + with: + commit-hash: ${{ github.sha }} + push-to-registry: yes + target-branch: ${{ github.ref_name }} + secrets: inherit + + run-cri-containerd-tests-ppc64le: + needs: build-assets-ppc64le + uses: ./.github/workflows/run-cri-containerd-tests-ppc64le.yaml + with: + commit-hash: ${{ github.event.pull_request.head.sha }} + target-branch: ${{ github.event.pull_request.base.ref }} publish-kata-deploy-payload-amd64: needs: build-assets-amd64 @@ -68,9 +83,20 @@ jobs: target-branch: ${{ github.ref_name }} secrets: inherit + publish-kata-deploy-payload-ppc64le: + needs: build-assets-ppc64le + uses: ./.github/workflows/publish-kata-deploy-payload-ppc64le.yaml + with: + commit-hash: ${{ github.sha }} + registry: quay.io + repo: kata-containers/kata-deploy-ci + tag: kata-containers-ppc64le + target-branch: ${{ github.ref_name }} + secrets: inherit + publish-manifest: runs-on: ubuntu-latest - needs: [publish-kata-deploy-payload-amd64, publish-kata-deploy-payload-arm64, publish-kata-deploy-payload-s390x] + needs: [publish-kata-deploy-payload-amd64, publish-kata-deploy-payload-arm64, publish-kata-deploy-payload-s390x, publish-kata-deploy-payload-ppc64le] steps: - name: Checkout repository uses: actions/checkout@v4 @@ -87,5 +113,6 @@ jobs: docker manifest create quay.io/kata-containers/kata-deploy-ci:kata-containers-latest \ --amend quay.io/kata-containers/kata-deploy-ci:kata-containers-amd64 \ --amend quay.io/kata-containers/kata-deploy-ci:kata-containers-arm64 \ - --amend quay.io/kata-containers/kata-deploy-ci:kata-containers-s390x + --amend quay.io/kata-containers/kata-deploy-ci:kata-containers-s390x \ + --amend quay.io/kata-containers/kata-deploy-ci:kata-containers-ppc64le docker manifest push quay.io/kata-containers/kata-deploy-ci:kata-containers-latest diff --git a/.github/workflows/publish-kata-deploy-payload-ppc64le.yaml b/.github/workflows/publish-kata-deploy-payload-ppc64le.yaml new file mode 100644 index 000000000000..555f70145c69 --- /dev/null +++ b/.github/workflows/publish-kata-deploy-payload-ppc64le.yaml @@ -0,0 +1,70 @@ +name: CI | Publish kata-deploy payload for ppc64le +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + kata-payload: + runs-on: ppc64le + steps: + - name: Adjust a permission for repo + run: | + sudo chown -R $USER:$USER $GITHUB_WORKSPACE + + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + # - name: Rebase atop of the latest target branch + # run: | + # ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + # env: + # TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-ppc64le${{ inputs.tarball-suffix }} + + - name: Login to Kata Containers quay.io + if: ${{ inputs.registry == 'quay.io' }} + uses: docker/login-action@v2 + with: + registry: quay.io + username: ${{ secrets.QUAY_DEPLOYER_USERNAME }} + password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + + - name: Login to Kata Containers ghcr.io + if: ${{ inputs.registry == 'ghcr.io' }} + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: build-and-push-kata-payload + id: build-and-push-kata-payload + run: | + ./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \ + $(pwd)/kata-static.tar.xz \ + ${{ inputs.registry }}/${{ inputs.repo }} ${{ inputs.tag }} diff --git a/.github/workflows/release-ppc64le.yaml b/.github/workflows/release-ppc64le.yaml new file mode 100644 index 000000000000..3081da9c3d9c --- /dev/null +++ b/.github/workflows/release-ppc64le.yaml @@ -0,0 +1,53 @@ +name: Publish Kata release artifacts for ppc64le +on: + workflow_call: + inputs: + target-arch: + required: true + type: string + +jobs: + build-kata-static-tarball-ppc64le: + uses: ./.github/workflows/build-kata-static-tarball-ppc64le.yaml + with: + stage: release + + kata-deploy: + needs: build-kata-static-tarball-ppc64le + runs-on: ppc + steps: + - name: Login to Kata Containers docker.io + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Login to Kata Containers quay.io + uses: docker/login-action@v2 + with: + registry: quay.io + username: ${{ secrets.QUAY_DEPLOYER_USERNAME }} + password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + + - uses: actions/checkout@v3 + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-ppc64le + + - name: build-and-push-kata-deploy-ci-ppc64le + id: build-and-push-kata-deploy-ci-ppc64le + run: | + # We need to do such trick here as the format of the $GITHUB_REF + # is "refs/tags/" + tag=$(echo $GITHUB_REF | cut -d/ -f3-) + tags=($tag) + tags+=($([[ "$tag" =~ "alpha"|"rc" ]] && echo "latest" || echo "stable")) + for tag in ${tags[@]}; do + ./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \ + $(pwd)/kata-static.tar.xz "docker.io/katadocker/kata-deploy" \ + "${tag}-${{ inputs.target-arch }}" true ${{ inputs.target-arch }} + ./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \ + $(pwd)/kata-static.tar.xz "quay.io/kata-containers/kata-deploy" \ + "${tag}-${{ inputs.target-arch }}" + done diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index b45cc6dadb1a..11e43fdd3cf3 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -26,10 +26,16 @@ jobs: with: target-arch: s390x secrets: inherit + + build-and-push-assets-ppc64le: + uses: ./.github/workflows/release-ppc64le.yaml + with: + target-arch: ppc64le + secrets: inherit publish-multi-arch-images: runs-on: ubuntu-latest - needs: [build-and-push-assets-amd64, build-and-push-assets-arm64, build-and-push-assets-s390x] + needs: [build-and-push-assets-amd64, build-and-push-assets-arm64, build-and-push-assets-s390x, build-and-push-assets-ppc64le] steps: - name: Checkout repository uses: actions/checkout@v4 @@ -58,12 +64,14 @@ jobs: docker manifest create quay.io/kata-containers/kata-deploy:${tag} \ --amend quay.io/kata-containers/kata-deploy:${tag}-amd64 \ --amend quay.io/kata-containers/kata-deploy:${tag}-arm64 \ - --amend quay.io/kata-containers/kata-deploy:${tag}-s390x + --amend quay.io/kata-containers/kata-deploy:${tag}-s390x \ + --amend quay.io/kata-containers/kata-deploy:${tag}-ppc64le docker manifest create docker.io/katadocker/kata-deploy:${tag} \ --amend docker.io/katadocker/kata-deploy:${tag}-amd64 \ --amend docker.io/katadocker/kata-deploy:${tag}-arm64 \ - --amend docker.io/katadocker/kata-deploy:${tag}-s390x + --amend docker.io/katadocker/kata-deploy:${tag}-s390x \ + --amend docker.io/katadocker/kata-deploy:${tag}-ppc64le docker manifest push quay.io/kata-containers/kata-deploy:${tag} docker manifest push docker.io/katadocker/kata-deploy:${tag} @@ -116,6 +124,20 @@ jobs: echo "uploading asset '${tarball}' for tag: ${tag}" GITHUB_TOKEN=${{ secrets.GIT_UPLOAD_TOKEN }} gh release upload "${tag}" "${tarball}" popd + + - name: download-artifacts-s390x + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-s390x + - name: push ppc64le static tarball to github + run: | + tag=$(echo $GITHUB_REF | cut -d/ -f3-) + tarball="kata-static-$tag-ppc64le.tar.xz" + mv kata-static.tar.xz "$GITHUB_WORKSPACE/${tarball}" + pushd $GITHUB_WORKSPACE + echo "uploading asset '${tarball}' for tag: ${tag}" + GITHUB_TOKEN=${{ secrets.GIT_UPLOAD_TOKEN }} hub release edit -m "" -a "${tarball}" "${tag}" + popd upload-versions-yaml: runs-on: ubuntu-latest diff --git a/.github/workflows/run-cri-containerd-tests-ppc64le.yaml b/.github/workflows/run-cri-containerd-tests-ppc64le.yaml new file mode 100644 index 000000000000..7802d5106184 --- /dev/null +++ b/.github/workflows/run-cri-containerd-tests-ppc64le.yaml @@ -0,0 +1,59 @@ +name: CI | Run cri-containerd tests on ppc64le +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-cri-containerd: + strategy: + # We can set this to true whenever we're 100% sure that + # the all the tests are not flaky, otherwise we'll fail + # all the tests due to a single flaky instance + fail-fast: false + matrix: + containerd_version: ['active'] + vmm: ['qemu'] + runs-on: ppc + env: + CONTAINERD_VERSION: ${{ matrix.containerd_version }} + GOPATH: ${{ github.workspace }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - name: Adjust a permission for repo + run: | + sudo chown -R $USER:$USER $GITHUB_WORKSPACE + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + # - name: Rebase atop of the latest target branch + # run: | + # ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + # env: + # TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/cri-containerd/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-ppc64le${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/cri-containerd/gha-run.sh install-kata kata-artifacts + + - name: Run cri-containerd tests + run: bash tests/integration/cri-containerd/gha-run.sh run diff --git a/.github/workflows/run-cri-containerd-tests-s390x.yaml b/.github/workflows/run-cri-containerd-tests-s390x.yaml new file mode 100644 index 000000000000..924772e2c76b --- /dev/null +++ b/.github/workflows/run-cri-containerd-tests-s390x.yaml @@ -0,0 +1,66 @@ +name: CI | Run cri-containerd tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-cri-containerd: + strategy: + # We can set this to true whenever we're 100% sure that + # the all the tests are not flaky, otherwise we'll fail + # all the tests due to a single flaky instance + fail-fast: false + matrix: + containerd_version: ['active'] + vmm: ['qemu'] + runs-on: s390x + env: + CONTAINERD_VERSION: ${{ matrix.containerd_version }} + GOPATH: ${{ github.workspace }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - name: Adjust a permission for repo + run: sudo chown -R $USER:$USER $GITHUB_WORKSPACE + + - name: Take a pre-action for self-hosted runner + run: ${HOME}/script/pre_action.sh ubuntu-2204 + + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/cri-containerd/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-s390x${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/cri-containerd/gha-run.sh install-kata kata-artifacts + + - name: Run cri-containerd tests + run: bash tests/integration/cri-containerd/gha-run.sh run + + - name: Take a post-action for self-hosted runner + if: always() + run: ${HOME}/script/post_action.sh ubuntu-2204 diff --git a/.github/workflows/run-k8s-tests-on-aks.yaml b/.github/workflows/run-k8s-tests-on-aks.yaml index 2fadc761ca8c..e27586979616 100644 --- a/.github/workflows/run-k8s-tests-on-aks.yaml +++ b/.github/workflows/run-k8s-tests-on-aks.yaml @@ -27,18 +27,23 @@ jobs: strategy: fail-fast: false matrix: + rust-runtime: + - false host_os: - ubuntu vmm: - clh - dragonball - qemu + - stratovirt instance-type: - small - normal include: - host_os: cbl-mariner vmm: clh + - dragonball: + rust-runtime: true runs-on: ubuntu-latest env: DOCKER_REGISTRY: ${{ inputs.registry }} @@ -50,6 +55,7 @@ jobs: KUBERNETES: "vanilla" USING_NFD: "false" K8S_TEST_HOST_TYPE: ${{ matrix.instance-type }} + RUST_RUNTIME: ${{ matrix.rust-runtime }} steps: - uses: actions/checkout@v4 with: diff --git a/.github/workflows/run-k8s-tests-on-zvsi.yaml b/.github/workflows/run-k8s-tests-on-zvsi.yaml new file mode 100644 index 000000000000..2c840c314800 --- /dev/null +++ b/.github/workflows/run-k8s-tests-on-zvsi.yaml @@ -0,0 +1,83 @@ +name: CI | Run kubernetes tests on IBM Cloud Z virtual server instance (zVSI) +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-k8s-tests: + strategy: + fail-fast: false + matrix: + vmm: + - qemu + snapshotter: + - devmapper + k8s: + - k3s + runs-on: s390x + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: "k3s" + SNAPSHOTTER: ${{ matrix.snapshotter }} + USING_NFD: "true" + TARGET_ARCH: "s390x" + steps: + - name: Adjust a permission for repo + run: sudo chown -R $USER:$USER $GITHUB_WORKSPACE + + - name: Take a pre-action for self-hosted runner + run: ${HOME}/script/pre_action.sh ubuntu-2204 + + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Deploy ${{ matrix.k8s }} + run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s + + - name: Configure the ${{ matrix.snapshotter }} snapshotter + run: bash tests/integration/kubernetes/gha-run.sh configure-snapshotter + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-zvsi + + - name: Run tests + timeout-minutes: 30 + run: bash tests/integration/kubernetes/gha-run.sh run-tests + + - name: Take a post-action + if: always() + run: | + bash tests/integration/kubernetes/gha-run.sh cleanup-zvsi || true + ${HOME}/script/post_action.sh ubuntu-2204 diff --git a/.github/workflows/run-metrics.yaml b/.github/workflows/run-metrics.yaml index 53deeb7864f2..7628a41369a5 100644 --- a/.github/workflows/run-metrics.yaml +++ b/.github/workflows/run-metrics.yaml @@ -48,7 +48,7 @@ jobs: # all the tests due to a single flaky instance. fail-fast: false matrix: - vmm: ['clh', 'qemu'] + vmm: ['clh', 'qemu', 'stratovirt'] max-parallel: 1 runs-on: metrics env: diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index 276208bf3ff2..955588625ce7 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -1825,6 +1825,13 @@ fn do_copy_file(req: &CopyFileRequest) -> Result<()> { } if sflag.contains(stat::SFlag::S_IFLNK) { + // After kubernetes secret's volume update, the '..data' symlink should point to + // the new timestamped directory. + // TODO:The old and deleted timestamped dir still exists due to missing DELETE api in agent. + // Hence, Unlink the existing symlink. + if path.is_symlink() && path.exists() { + unistd::unlink(&path)?; + } let src = PathBuf::from(OsStr::from_bytes(&req.data)); unistd::symlinkat(&src, None, &path)?; let path_str = CString::new(path.as_os_str().as_bytes())?; diff --git a/src/dragonball/src/dbs_boot/src/x86_64/mptable.rs b/src/dragonball/src/dbs_boot/src/x86_64/mptable.rs index 008e972a521c..6541ef293bae 100644 --- a/src/dragonball/src/dbs_boot/src/x86_64/mptable.rs +++ b/src/dragonball/src/dbs_boot/src/x86_64/mptable.rs @@ -9,6 +9,7 @@ //! MP Table configurations used for defining VM boot status. use libc::c_char; +use std::collections::HashMap; use std::io; use std::mem; use std::result; @@ -133,6 +134,7 @@ const MPC_OEM: [c_char; 8] = char_array!(c_char; 'A', 'L', 'I', 'C', 'L', 'O', ' const MPC_PRODUCT_ID: [c_char; 12] = char_array!(c_char; 'D', 'R', 'A', 'G', 'O', 'N', 'B', 'A', 'L', 'L', '1', '0'); const BUS_TYPE_ISA: [u8; 6] = char_array!(u8; 'I', 'S', 'A', ' ', ' ', ' '); +const BUS_TYPE_PCI: [u8; 6] = char_array!(u8; 'P', 'C', 'I', ' ', ' ', ' '); const IO_APIC_DEFAULT_PHYS_BASE: u32 = 0xfec0_0000; // source: linux/arch/x86/include/asm/apicdef.h const APIC_DEFAULT_PHYS_BASE: u32 = 0xfee0_0000; // source: linux/arch/x86/include/asm/apicdef.h @@ -144,6 +146,7 @@ const CPU_FEATURE_APIC: u32 = 0x200; const CPU_FEATURE_FPU: u32 = 0x001; const BUS_ID_ISA: u8 = 0; +const BUS_ID_PCI: u8 = 1; fn compute_checksum(v: &T) -> u8 { // Safe because we are only reading the bytes within the size of the `T` reference `v`. @@ -171,7 +174,12 @@ fn compute_mp_size(num_cpus: u8) -> usize { } /// Performs setup of the MP table for the given `num_cpus` -pub fn setup_mptable(mem: &M, boot_cpus: u8, max_cpus: u8) -> Result<()> { +pub fn setup_mptable( + mem: &M, + boot_cpus: u8, + max_cpus: u8, + pci_legacy_irqs: Option<&HashMap>, +) -> Result<()> { if boot_cpus > max_cpus { return Err(Error::TooManyBootCpus); } @@ -252,6 +260,18 @@ pub fn setup_mptable(mem: &M, boot_cpus: u8, max_cpus: u8) -> Re checksum = checksum.wrapping_add(compute_checksum(&mpc_bus.0)); } + { + let size = mem::size_of::() as u64; + let mut mpc_bus = MpcBusWrapper(mpspec::mpc_bus::default()); + mpc_bus.0.type_ = mpspec::MP_BUS as u8; + mpc_bus.0.busid = BUS_ID_PCI; + mpc_bus.0.bustype = BUS_TYPE_PCI; + mem.write_obj(mpc_bus, base_mp) + .map_err(|_| Error::WriteMpcBus)?; + base_mp = base_mp.unchecked_add(size); + checksum = checksum.wrapping_add(compute_checksum(&mpc_bus.0)); + } + { let size = mem::size_of::() as u64; let mut mpc_ioapic = MpcIoapicWrapper(mpspec::mpc_ioapic::default()); @@ -276,6 +296,21 @@ pub fn setup_mptable(mem: &M, boot_cpus: u8, max_cpus: u8) -> Re mpc_intsrc.0.srcbusirq = i; mpc_intsrc.0.dstapic = ioapicid; mpc_intsrc.0.dstirq = i; + // Patch irq routing entry for mptable if it is registered + // as PCI legacy irq. + if let Some(irq_device) = pci_legacy_irqs { + if let Some(device_id) = irq_device.get(&i) { + mpc_intsrc.0.srcbus = BUS_ID_PCI; + mpc_intsrc.0.srcbusirq = device_id << 2; + } + } + // Keep it consistent with irq routing configuration in initialize_legacy(), + // IRQ0 is connected to Pin2 of the first IOAPIC and IRQ2 is unused. + if i == 0 { + mpc_intsrc.0.dstirq = 2; + } else if i == 2 { + continue; + } mem.write_obj(mpc_intsrc, base_mp) .map_err(|_| Error::WriteMpcIntsrc)?; base_mp = base_mp.unchecked_add(size); @@ -368,7 +403,7 @@ mod tests { )]) .unwrap(); - setup_mptable(&mem, num_cpus, num_cpus).unwrap(); + setup_mptable(&mem, num_cpus, num_cpus, None).unwrap(); } #[test] @@ -380,7 +415,7 @@ mod tests { )]) .unwrap(); - assert!(setup_mptable(&mem, num_cpus, num_cpus).is_err()); + assert!(setup_mptable(&mem, num_cpus, num_cpus, None).is_err()); } #[test] @@ -392,7 +427,7 @@ mod tests { )]) .unwrap(); - setup_mptable(&mem, num_cpus, num_cpus).unwrap(); + setup_mptable(&mem, num_cpus, num_cpus, None).unwrap(); let mpf_intel: MpfIntelWrapper = mem.read_obj(GuestAddress(MPTABLE_START)).unwrap(); @@ -411,7 +446,7 @@ mod tests { )]) .unwrap(); - setup_mptable(&mem, num_cpus, num_cpus).unwrap(); + setup_mptable(&mem, num_cpus, num_cpus, None).unwrap(); let mpf_intel: MpfIntelWrapper = mem.read_obj(GuestAddress(MPTABLE_START)).unwrap(); let mpc_offset = GuestAddress(u64::from(mpf_intel.0.physptr)); @@ -445,7 +480,7 @@ mod tests { .unwrap(); for i in 0..MAX_SUPPORTED_CPUS as u8 { - setup_mptable(&mem, i, i).unwrap(); + setup_mptable(&mem, i, i, None).unwrap(); let mpf_intel: MpfIntelWrapper = mem.read_obj(GuestAddress(MPTABLE_START)).unwrap(); let mpc_offset = GuestAddress(u64::from(mpf_intel.0.physptr)); @@ -481,7 +516,7 @@ mod tests { .unwrap(); for i in 0..MAX_SUPPORTED_CPUS as u8 { - setup_mptable(&mem, i, MAX_SUPPORTED_CPUS as u8).unwrap(); + setup_mptable(&mem, i, MAX_SUPPORTED_CPUS as u8, None).unwrap(); let mpf_intel: MpfIntelWrapper = mem.read_obj(GuestAddress(MPTABLE_START)).unwrap(); let mpc_offset = GuestAddress(u64::from(mpf_intel.0.physptr)); @@ -517,7 +552,34 @@ mod tests { )]) .unwrap(); - let result = setup_mptable(&mem, cpus as u8, cpus as u8).unwrap_err(); + let result = setup_mptable(&mem, cpus as u8, cpus as u8, None).unwrap_err(); assert_eq!(result, Error::TooManyCpus); } + + #[test] + fn irq_mptable_validation() { + let cpus = 1; + let mem = GuestMemoryMmap::<()>::from_ranges(&[( + GuestAddress(MPTABLE_START), + compute_mp_size(cpus as u8), + )]) + .unwrap(); + let mut pci_legacy_irqs = HashMap::new(); + pci_legacy_irqs.insert(0_u8, 2_u8); + setup_mptable(&mem, cpus as u8, cpus as u8, Some(&pci_legacy_irqs)).unwrap(); + let mpf_intel: MpfIntelWrapper = mem.read_obj(GuestAddress(MPTABLE_START)).unwrap(); + let mpc_offset = GuestAddress(u64::from(mpf_intel.0.physptr)); + let irq_offset = mpc_offset + .checked_add( + mem::size_of::() as u64 + + mem::size_of::() as u64 * cpus as u64 + + mem::size_of::() as u64 + + mem::size_of::() as u64 * 2, + ) + .unwrap(); + let mpc_int_table: MpcIntsrcWrapper = mem.read_obj(irq_offset).unwrap(); + assert_eq!(mpc_int_table.0.srcbusirq, 2 << 2); + assert_eq!(mpc_int_table.0.srcbus, BUS_ID_PCI); + assert_eq!(mpc_int_table.0.dstirq, 2); + } } diff --git a/src/dragonball/src/dbs_virtio_devices/Cargo.toml b/src/dragonball/src/dbs_virtio_devices/Cargo.toml index 9299915ad9b7..eb0912f306b8 100644 --- a/src/dragonball/src/dbs_virtio_devices/Cargo.toml +++ b/src/dragonball/src/dbs_virtio_devices/Cargo.toml @@ -53,3 +53,4 @@ virtio-mem = ["virtio-mmio"] virtio-balloon = ["virtio-mmio"] vhost = ["virtio-mmio", "vhost-rs/vhost-user-master", "vhost-rs/vhost-kern"] vhost-net = ["vhost", "vhost-rs/vhost-net"] +vhost-user = ["vhost"] \ No newline at end of file diff --git a/src/dragonball/src/dbs_virtio_devices/src/balloon.rs b/src/dragonball/src/dbs_virtio_devices/src/balloon.rs index 760000fbfbdc..1fe58aec514c 100644 --- a/src/dragonball/src/dbs_virtio_devices/src/balloon.rs +++ b/src/dragonball/src/dbs_virtio_devices/src/balloon.rs @@ -675,11 +675,14 @@ where error!("Failed to write config space"); return Err(ConfigError::InvalidOffset(offset)); }; - let Some(dst) = start.checked_add(data.len()) - .and_then(|end| config_slice.get_mut(start..end)) else - { + let Some(dst) = start + .checked_add(data.len()) + .and_then(|end| config_slice.get_mut(start..end)) + else { error!("Failed to write config space"); - return Err(ConfigError::InvalidOffsetPlusDataLen(offset + data.len() as u64)); + return Err(ConfigError::InvalidOffsetPlusDataLen( + offset + data.len() as u64, + )); }; dst.copy_from_slice(data); Ok(()) diff --git a/src/dragonball/src/dbs_virtio_devices/src/block/device.rs b/src/dragonball/src/dbs_virtio_devices/src/block/device.rs index 8caeef3b9921..c366a918a357 100644 --- a/src/dragonball/src/dbs_virtio_devices/src/block/device.rs +++ b/src/dragonball/src/dbs_virtio_devices/src/block/device.rs @@ -382,7 +382,7 @@ mod tests { use crate::epoll_helper::*; use crate::tests::{VirtQueue, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; - use crate::{Error as VirtIoError, VirtioQueueConfig}; + use crate::{Error as VirtioError, VirtioQueueConfig}; use super::*; use crate::block::*; @@ -829,7 +829,7 @@ mod tests { .unwrap(); assert!(matches!( req.execute(&mut disk, m, &data_descs, &disk_id), - Err(ExecuteError::BadRequest(VirtIoError::InvalidOffset)) + Err(ExecuteError::BadRequest(VirtioError::InvalidOffset)) )); let mut file = DummyFile::new(); diff --git a/src/dragonball/src/dbs_virtio_devices/src/fs/device.rs b/src/dragonball/src/dbs_virtio_devices/src/fs/device.rs index d0156db7a7d3..932b5362e319 100644 --- a/src/dragonball/src/dbs_virtio_devices/src/fs/device.rs +++ b/src/dragonball/src/dbs_virtio_devices/src/fs/device.rs @@ -970,7 +970,7 @@ pub mod tests { use vm_memory::GuestMemoryRegion; use vm_memory::{GuestAddress, GuestMemoryMmap, GuestRegionMmap}; use vmm_sys_util::tempfile::TempFile; - use Error as VirtIoError; + use Error as VirtioError; use super::*; use crate::device::VirtioRegionHandler; @@ -996,7 +996,7 @@ pub mod tests { fn insert_region( &mut self, _region: Arc, - ) -> std::result::Result<(), VirtIoError> { + ) -> std::result::Result<(), VirtioError> { Ok(()) } } diff --git a/src/dragonball/src/dbs_virtio_devices/src/lib.rs b/src/dragonball/src/dbs_virtio_devices/src/lib.rs index 59727b090378..93c452544041 100644 --- a/src/dragonball/src/dbs_virtio_devices/src/lib.rs +++ b/src/dragonball/src/dbs_virtio_devices/src/lib.rs @@ -125,6 +125,32 @@ pub enum ActivateError { InvalidQueueConfig, #[error("IO: {0}.")] IOError(#[from] IOError), + #[error("Virtio error")] + VirtioError(Error), + #[error("Epoll manager error")] + EpollMgr(dbs_utils::epoll_manager::Error), + #[cfg(feature = "vhost")] + #[error("Vhost activate error")] + VhostActivate(vhost_rs::Error), +} + +impl std::convert::From for ActivateError { + fn from(error: Error) -> ActivateError { + ActivateError::VirtioError(error) + } +} + +impl std::convert::From for ActivateError { + fn from(error: dbs_utils::epoll_manager::Error) -> ActivateError { + ActivateError::EpollMgr(error) + } +} + +#[cfg(feature = "vhost")] +impl std::convert::From for ActivateError { + fn from(error: vhost_rs::Error) -> ActivateError { + ActivateError::VhostActivate(error) + } } /// Error code for VirtioDevice::read_config()/write_config(). @@ -155,6 +181,9 @@ pub enum Error { /// Guest gave us a descriptor that was too big to use. #[error("descriptor length too big.")] DescriptorLengthTooBig, + /// Error from the epoll event manager + #[error("dbs_utils error: {0:?}.")] + EpollMgr(dbs_utils::epoll_manager::Error), /// Guest gave us a write only descriptor that protocol says to read from. #[error("unexpected write only descriptor.")] UnexpectedWriteOnlyDescriptor, @@ -181,7 +210,7 @@ pub enum Error { VirtioQueueError(#[from] VqError), /// Error from Device activate. #[error("Device activate error: {0}")] - ActivateError(#[from] ActivateError), + ActivateError(#[from] Box), /// Error from Interrupt. #[error("Interrupt error: {0}")] InterruptError(IOError), @@ -229,6 +258,15 @@ pub enum Error { #[cfg(feature = "virtio-balloon")] #[error("Virtio-balloon error: {0}")] VirtioBalloonError(#[from] balloon::BalloonError), + + #[cfg(feature = "vhost")] + /// Error from the vhost subsystem + #[error("Vhost error: {0:?}")] + VhostError(vhost_rs::Error), + #[cfg(feature = "vhost")] + /// Error from the vhost user subsystem + #[error("Vhost-user error: {0:?}")] + VhostUserError(vhost_rs::vhost_user::Error), } // Error for tap devices diff --git a/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_state.rs b/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_state.rs index 434be51a915b..796024879a2d 100644 --- a/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_state.rs +++ b/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_state.rs @@ -124,7 +124,9 @@ where // If the driver incorrectly sets up the queues, the following check will fail and take // the device into an unusable state. if !self.check_queues_valid() { - return Err(Error::ActivateError(ActivateError::InvalidQueueConfig)); + return Err(Error::ActivateError(Box::new( + ActivateError::InvalidQueueConfig, + ))); } self.register_ioevent()?; @@ -138,7 +140,7 @@ where .map(|_| self.device_activated = true) .map_err(|e| { error!("device activate error: {:?}", e); - Error::ActivateError(e) + Error::ActivateError(Box::new(e)) }) } diff --git a/src/dragonball/src/dbs_virtio_devices/src/vhost/mod.rs b/src/dragonball/src/dbs_virtio_devices/src/vhost/mod.rs index 7c2940e69934..d60a281aa597 100644 --- a/src/dragonball/src/dbs_virtio_devices/src/vhost/mod.rs +++ b/src/dragonball/src/dbs_virtio_devices/src/vhost/mod.rs @@ -6,3 +6,21 @@ #[cfg(feature = "vhost-net")] pub mod vhost_kern; + +pub use vhost_rs::vhost_user::Error as VhostUserError; +pub use vhost_rs::Error as VhostError; + +#[cfg(feature = "vhost-user")] +pub mod vhost_user; + +impl std::convert::From for super::Error { + fn from(e: VhostError) -> Self { + super::Error::VhostError(e) + } +} + +impl std::convert::From for super::Error { + fn from(e: VhostUserError) -> Self { + super::Error::VhostUserError(e) + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vhost/vhost_kern/net.rs b/src/dragonball/src/dbs_virtio_devices/src/vhost/vhost_kern/net.rs index 2b9a379de566..cd65474ece52 100644 --- a/src/dragonball/src/dbs_virtio_devices/src/vhost/vhost_kern/net.rs +++ b/src/dragonball/src/dbs_virtio_devices/src/vhost/vhost_kern/net.rs @@ -290,7 +290,7 @@ where "{}: Invalid virtio queue pairs, expected a value greater than 0, but got {}", NET_DRIVER_NAME, self.vq_pairs ); - return Err(VirtioError::ActivateError(ActivateError::InvalidParam)); + return Err(VirtioError::ActivateError(Box::new(ActivateError::InvalidParam))); } if self.handles.len() != self.vq_pairs || self.taps.len() != self.vq_pairs { @@ -299,7 +299,7 @@ where self.handles.len(), self.taps.len(), self.vq_pairs); - return Err(VirtioError::ActivateError(ActivateError::InternalError)); + return Err(VirtioError::ActivateError(Box::new(ActivateError::InternalError))); } for idx in 0..self.vq_pairs { diff --git a/src/dragonball/src/dbs_virtio_devices/src/vhost/vhost_user/connection.rs b/src/dragonball/src/dbs_virtio_devices/src/vhost/vhost_user/connection.rs new file mode 100644 index 000000000000..7eeeef1baf03 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vhost/vhost_user/connection.rs @@ -0,0 +1,552 @@ +// Copyright (C) 2019-2023 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Helper utilities for vhost-user communication channel. + +use std::ops::Deref; +use std::os::unix::io::{AsRawFd, RawFd}; + +use dbs_utils::epoll_manager::{EventOps, EventSet, Events}; +use log::*; +use vhost_rs::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVringAddrFlags}; +use vhost_rs::vhost_user::{ + Error as VhostUserError, Listener as VhostUserListener, Master, VhostUserMaster, +}; +use vhost_rs::{VhostBackend, VhostUserMemoryRegionInfo, VringConfigData}; +use virtio_queue::QueueT; +use vm_memory::{ + Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryRegion, MemoryRegionAddress, +}; +use vmm_sys_util::eventfd::EventFd; + +use super::super::super::device::VirtioDeviceConfig; +use super::super::super::{Error as VirtioError, Result as VirtioResult}; +use super::VhostError; + +enum EndpointProtocolFlags { + ProtocolMq = 1, +} + +pub(super) struct Listener { + listener: VhostUserListener, + /// Slot to register epoll event for the underlying socket. + slot: u32, + name: String, + path: String, +} + +impl Listener { + pub fn new(name: String, path: String, force: bool, slot: u32) -> VirtioResult { + info!("vhost-user: create listener at {} for {}", path, name); + Ok(Listener { + listener: VhostUserListener::new(&path, force)?, + slot, + name, + path, + }) + } + + // Wait for an incoming connection until success. + pub fn accept(&self) -> VirtioResult<(Master, u64)> { + loop { + match self.try_accept() { + Ok(Some((master, feature))) => return Ok((master, feature)), + Ok(None) => continue, + Err(e) => return Err(e), + } + } + } + + pub fn try_accept(&self) -> VirtioResult> { + let sock = match self.listener.accept() { + Ok(Some(conn)) => conn, + Ok(None) => return Ok(None), + Err(e) => return Err(e.into()), + }; + + let mut master = Master::from_stream(sock, 1); + info!("{}: try to get virtio features from slave.", self.name); + match Endpoint::initialize(&mut master) { + Ok(Some(features)) => Ok(Some((master, features))), + // The new connection has been closed, try again. + Ok(None) => { + warn!( + "{}: new connection get closed during initialization, waiting for another one.", + self.name + ); + Ok(None) + } + // Unrecoverable error happened + Err(e) => { + error!("{}: failed to get virtio features, {}", self.name, e); + Err(e) + } + } + } + + /// Register the underlying listener to be monitored for incoming connection. + pub fn register_epoll_event(&self, ops: &mut EventOps) -> VirtioResult<()> { + info!("{}: monitor incoming connect at {}", self.name, self.path); + // Switch to nonblocking mode. + self.listener.set_nonblocking(true)?; + let event = Events::with_data(&self.listener, self.slot, EventSet::IN); + ops.add(event).map_err(VirtioError::EpollMgr) + } +} + +/// Struct to pass info to vhost user backend +#[derive(Clone)] +pub struct BackendInfo { + /// -1 means to tell backend to destroy corresponding + /// device, while others means construct it + fd: i32, + /// cluster id of device, must set + cluster_id: u32, + /// device id of device, must set + device_id: u64, + /// device config file path + filename: [u8; 128], +} + +/// Struct to pass function parameters to methods of Endpoint. +pub(super) struct EndpointParam<'a, AS: GuestAddressSpace, Q: QueueT, R: GuestMemoryRegion> { + pub virtio_config: &'a VirtioDeviceConfig, + pub intr_evts: Vec<&'a EventFd>, + pub queue_sizes: &'a [u16], + pub features: u64, + pub protocol_flag: u16, + pub dev_protocol_features: VhostUserProtocolFeatures, + pub reconnect: bool, + pub backend: Option, + pub init_queues: u32, + pub slave_req_fd: Option, +} + +impl<'a, AS: GuestAddressSpace, Q: QueueT, R: GuestMemoryRegion> EndpointParam<'a, AS, Q, R> { + fn get_host_address(&self, addr: GuestAddress, mem: &AS::M) -> VirtioResult<*mut u8> { + mem.get_host_address(addr) + .map_err(|_| VirtioError::InvalidGuestAddress(addr)) + } + + /// set protocol multi-queue bit + pub fn set_protocol_mq(&mut self) { + self.protocol_flag |= EndpointProtocolFlags::ProtocolMq as u16; + } + + /// check if multi-queue bit is set + pub fn has_protocol_mq(&self) -> bool { + (self.protocol_flag & (EndpointProtocolFlags::ProtocolMq as u16)) != 0 + } +} + +/// Communication channel from the master to the slave. +/// +/// It encapsulates a low-level vhost-user master side communication endpoint, and provides +/// connection initialization, monitoring and reconnect functionalities for vhost-user devices. +/// +/// Caller needs to ensure mutual exclusive access to the object. +pub(super) struct Endpoint { + /// Underlying vhost-user communication endpoint. + conn: Option, + old: Option, + /// Token to register epoll event for the underlying socket. + slot: u32, + /// Identifier string for logs. + name: String, +} + +impl Endpoint { + pub fn new(master: Master, slot: u32, name: String) -> Self { + Endpoint { + conn: Some(master), + old: None, + slot, + name, + } + } + + /// First state of the connection negotiation between the master and the slave. + /// + /// If Ok(None) is returned, the underlying communication channel gets broken and the caller may + /// try to recreate the communication channel and negotiate again. + /// + /// # Return + /// * - Ok(Some(avial_features)): virtio features from the slave + /// * - Ok(None): underlying communicaiton channel gets broken during negotiation + /// * - Err(e): error conditions + fn initialize(master: &mut Master) -> VirtioResult> { + // 1. Seems that some vhost-user slaves depend on the get_features request to driver its + // internal state machine. + // N.B. it's really TDD, we just found it works in this way. Any spec about this? + let features = match master.get_features() { + Ok(val) => val, + Err(VhostError::VhostUserProtocol(VhostUserError::SocketBroken(_e))) => { + return Ok(None) + } + Err(e) => return Err(e.into()), + }; + + Ok(Some(features)) + } + + pub fn update_memory(&mut self, vm_as: &AS) -> VirtioResult<()> { + let master = match self.conn.as_mut() { + Some(conn) => conn, + None => { + error!("vhost user master is None!"); + return Err(VirtioError::InternalError); + } + }; + let guard = vm_as.memory(); + let mem = guard.deref(); + let mut regions = Vec::new(); + for region in mem.iter() { + let guest_phys_addr = region.start_addr(); + let file_offset = region.file_offset().ok_or_else(|| { + error!("region file_offset get error!"); + VirtioError::InvalidGuestAddress(guest_phys_addr) + })?; + let userspace_addr = region + .get_host_address(MemoryRegionAddress(0)) + .map_err(|e| { + error!("get_host_address error! {:?}", e); + VirtioError::InvalidGuestAddress(guest_phys_addr) + })?; + + regions.push(VhostUserMemoryRegionInfo { + guest_phys_addr: guest_phys_addr.raw_value() as u64, + memory_size: region.len() as u64, + userspace_addr: userspace_addr as *const u8 as u64, + mmap_offset: file_offset.start(), + mmap_handle: file_offset.file().as_raw_fd(), + }); + } + master.set_mem_table(®ions)?; + Ok(()) + } + + /// Drive the negotiation and initialization process with the vhost-user slave. + pub fn negotiate( + &mut self, + config: &EndpointParam, + mut old: Option<&mut Master>, + ) -> VirtioResult<()> { + let guard = config.virtio_config.lock_guest_memory(); + let mem = guard.deref(); + let queue_num = config.virtio_config.queues.len(); + assert_eq!(queue_num, config.queue_sizes.len()); + assert_eq!(queue_num, config.intr_evts.len()); + + let master = match self.conn.as_mut() { + Some(conn) => conn, + None => return Err(VirtioError::InternalError), + }; + + info!("{}: negotiate()", self.name); + master.set_owner()?; + info!("{}: set_owner()", self.name); + + // 3. query features again after set owner. + let features = master.get_features()?; + info!("{}: get_features({:X})", self.name, features); + + // 4. set virtio features. + master.set_features(config.features)?; + info!("{}: set_features({:X})", self.name, config.features); + + // 5. set vhost-user protocol features + // typical protocol features: 0x37 + let mut protocol_features = master.get_protocol_features()?; + info!( + "{}: get_protocol_features({:X})", + self.name, protocol_features + ); + // There are two virtque for rx/tx. + if config.has_protocol_mq() && !protocol_features.contains(VhostUserProtocolFeatures::MQ) { + return Err(VhostError::VhostUserProtocol(VhostUserError::FeatureMismatch).into()); + } + protocol_features &= config.dev_protocol_features; + master.set_protocol_features(protocol_features)?; + info!( + "{}: set_protocol_features({:X}), dev_protocol_features({:X})", + self.name, protocol_features, config.dev_protocol_features + ); + + // Setup slave channel if SLAVE_REQ protocol feature is set + if protocol_features.contains(VhostUserProtocolFeatures::SLAVE_REQ) { + match config.slave_req_fd { + Some(fd) => master.set_slave_request_fd(&fd)?, + None => { + error!( + "{}: Protocol feature SLAVE_REQ is set but not slave channel fd", + self.name + ); + return Err(VhostError::VhostUserProtocol(VhostUserError::InvalidParam).into()); + } + } + } else { + info!("{}: has no SLAVE_REQ protocol feature set", self.name); + } + + // 6. check number of queues supported + if config.has_protocol_mq() { + let queue_num = master.get_queue_num()?; + info!("{}: get_queue_num({:X})", self.name, queue_num); + if queue_num < config.queue_sizes.len() as u64 { + return Err(VhostError::VhostUserProtocol(VhostUserError::FeatureMismatch).into()); + } + } + + // 7. trigger the backend state machine. + for queue_index in 0..queue_num { + master.set_vring_call(queue_index, config.intr_evts[queue_index])?; + } + info!("{}: set_vring_call()", self.name); + + // 8. set mem_table + let mut regions = Vec::new(); + for region in mem.iter() { + let guest_phys_addr = region.start_addr(); + let file_offset = region + .file_offset() + .ok_or(VirtioError::InvalidGuestAddress(guest_phys_addr))?; + let userspace_addr = region + .get_host_address(MemoryRegionAddress(0)) + .map_err(|_| VirtioError::InvalidGuestAddress(guest_phys_addr))?; + + regions.push(VhostUserMemoryRegionInfo { + guest_phys_addr: guest_phys_addr.raw_value() as u64, + memory_size: region.len() as u64, + userspace_addr: userspace_addr as *const u8 as u64, + mmap_offset: file_offset.start(), + mmap_handle: file_offset.file().as_raw_fd(), + }); + } + master.set_mem_table(®ions)?; + info!("{}: set_mem_table()", self.name); + + // 9. setup vrings + for queue_cfg in config.virtio_config.queues.iter() { + master.set_vring_num(queue_cfg.index() as usize, queue_cfg.actual_size() as u16)?; + info!( + "{}: set_vring_num(idx: {}, size: {})", + self.name, + queue_cfg.index(), + queue_cfg.actual_size(), + ); + } + // On reconnection, the slave may have processed some packets in virtque and queue + // base is not zero any more. So don't set queue base on reconnection. + // N.B. it's really TDD, we just found it works in this way. Any spec about this? + for queue_index in 0..queue_num { + let base = if old.is_some() { + let conn = old.as_mut().unwrap(); + match conn.get_vring_base(queue_index) { + Ok(val) => Some(val), + Err(_) => None, + } + } else if !config.reconnect { + Some(0) + } else { + None + }; + if let Some(val) = base { + master.set_vring_base(queue_index, val as u16)?; + info!( + "{}: set_vring_base(idx: {}, base: {})", + self.name, queue_index, val + ); + } + } + for queue_cfg in config.virtio_config.queues.iter() { + let queue = &queue_cfg.queue; + let queue_index = queue_cfg.index() as usize; + let desc_addr = + config.get_host_address(vm_memory::GuestAddress(queue.desc_table()), mem)?; + let used_addr = + config.get_host_address(vm_memory::GuestAddress(queue.used_ring()), mem)?; + let avail_addr = + config.get_host_address(vm_memory::GuestAddress(queue.avail_ring()), mem)?; + master.set_vring_addr( + queue_index, + &VringConfigData { + queue_max_size: queue.max_size(), + queue_size: queue_cfg.actual_size(), + flags: VhostUserVringAddrFlags::empty().bits(), + desc_table_addr: desc_addr as u64, + used_ring_addr: used_addr as u64, + avail_ring_addr: avail_addr as u64, + log_addr: None, + }, + )?; + info!( + "{}: set_vring_addr(idx: {}, addr: {:p})", + self.name, queue_index, desc_addr + ); + } + for queue_index in 0..queue_num { + master.set_vring_kick( + queue_index, + &config.virtio_config.queues[queue_index].eventfd, + )?; + info!( + "{}: set_vring_kick(idx: {}, fd: {})", + self.name, + queue_index, + config.virtio_config.queues[queue_index].eventfd.as_raw_fd() + ); + } + for queue_index in 0..queue_num { + let intr_index = if config.intr_evts.len() == 1 { + 0 + } else { + queue_index + }; + master.set_vring_call(queue_index, config.intr_evts[intr_index])?; + info!( + "{}: set_vring_call(idx: {}, fd: {})", + self.name, + queue_index, + config.intr_evts[intr_index].as_raw_fd() + ); + } + for queue_index in 0..queue_num { + master.set_vring_enable(queue_index, true)?; + info!( + "{}: set_vring_enable(idx: {}, enable: {})", + self.name, queue_index, true + ); + if (queue_index + 1) == config.init_queues as usize { + break; + } + } + info!("{}: protocol negotiate completed successfully.", self.name); + + Ok(()) + } + + pub fn set_queues_attach(&mut self, curr_queues: u32) -> VirtioResult<()> { + let master = match self.conn.as_mut() { + Some(conn) => conn, + None => return Err(VirtioError::InternalError), + }; + + for index in 0..curr_queues { + master.set_vring_enable(index as usize, true)?; + info!( + "{}: set_vring_enable(idx: {}, enable: {})", + self.name, index, true + ); + } + + Ok(()) + } + + /// Restore communication with the vhost-user slave on reconnect. + pub fn reconnect( + &mut self, + master: Master, + config: &EndpointParam, + ops: &mut EventOps, + ) -> VirtioResult<()> { + let mut old = self.conn.replace(master); + if let Err(e) = self.negotiate(config, old.as_mut()) { + error!("{}: failed to initialize connection: {}", self.name, e); + self.conn = old; + return Err(e); + } + if let Err(e) = self.register_epoll_event(ops) { + error!("{}: failed to add fd to epoll: {}", self.name, e); + self.conn = old; + return Err(e); + } + self.old = old; + Ok(()) + } + + /// Teardown the communication channel to the vhost-user slave. + pub fn disconnect(&mut self, ops: &mut EventOps) -> VirtioResult<()> { + info!("vhost-user-net: disconnect communication channel."); + match self.old.take() { + Some(master) => { + info!("close old connection"); + self.deregister_epoll_event(&master, ops) + } + None => match self.conn.take() { + Some(master) => { + info!("disconnect connection."); + self.deregister_epoll_event(&master, ops) + } + None => { + info!("get disconnect notification when it's already disconnected."); + Ok(()) + } + }, + } + } + + /// Register the underlying socket to be monitored for socket disconnect events. + pub fn register_epoll_event(&self, ops: &mut EventOps) -> VirtioResult<()> { + match self.conn.as_ref() { + Some(master) => { + info!( + "{}: monitor disconnect event for fd {}.", + self.name, + master.as_raw_fd() + ); + ops.add(Events::with_data( + master, + self.slot, + EventSet::HANG_UP | EventSet::EDGE_TRIGGERED, + )) + .map_err(VirtioError::EpollMgr) + } + None => Err(VirtioError::InternalError), + } + } + + /// Deregister the underlying socket from the epoll controller. + pub fn deregister_epoll_event(&self, master: &Master, ops: &mut EventOps) -> VirtioResult<()> { + info!( + "{}: unregister epoll event for fd {}.", + self.name, + master.as_raw_fd() + ); + ops.remove(Events::with_data( + master, + self.slot, + EventSet::HANG_UP | EventSet::EDGE_TRIGGERED, + )) + .map_err(VirtioError::EpollMgr) + } + + pub fn set_master(&mut self, master: Master) { + self.conn = Some(master); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_endpoint_flags() { + assert_eq!(EndpointProtocolFlags::ProtocolMq as u16, 0x1); + } + + #[should_panic] + #[test] + fn test_connect_try_accept() { + let listener = Listener::new( + "test_listener".to_string(), + "/tmp/test_vhost_listener".to_string(), + true, + 1, + ) + .unwrap(); + + listener.listener.set_nonblocking(true).unwrap(); + + assert!(listener.try_accept().is_err()); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vhost/vhost_user/mod.rs b/src/dragonball/src/dbs_virtio_devices/src/vhost/vhost_user/mod.rs new file mode 100644 index 000000000000..ad78fe763543 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vhost/vhost_user/mod.rs @@ -0,0 +1,11 @@ +// Copyright (C) 2019-2023 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Vhost-based virtio device backend implementations. + +use super::VhostError; + +pub mod connection; + +#[cfg(test)] +mod test_utils; diff --git a/src/dragonball/src/dbs_virtio_devices/src/vhost/vhost_user/test_utils.rs b/src/dragonball/src/dbs_virtio_devices/src/vhost/vhost_user/test_utils.rs new file mode 100644 index 000000000000..ac5fb9e1d7b2 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vhost/vhost_user/test_utils.rs @@ -0,0 +1,750 @@ +// Copyright (C) 2021 Alibaba Cloud Computing. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +use std::fmt::Debug; +use std::marker::PhantomData; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::os::unix::net::UnixStream; +use std::{mem, slice}; + +use vmm_sys_util::tempfile::TempFile; +use libc::{c_void, iovec}; +use vhost_rs::vhost_user::message::{ + VhostUserHeaderFlag, VhostUserInflight, VhostUserMemory, VhostUserMemoryRegion, + VhostUserMsgValidator, VhostUserProtocolFeatures, VhostUserU64, VhostUserVirtioFeatures, + VhostUserVringAddr, VhostUserVringState, MAX_MSG_SIZE, +}; +use vhost_rs::vhost_user::Error; +use vmm_sys_util::sock_ctrl_msg::ScmSocket; + +pub const MAX_ATTACHED_FD_ENTRIES: usize = 32; + +pub(crate) trait Req: + Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Into +{ + fn is_valid(&self) -> bool; +} + +pub type Result = std::result::Result; + +/// Type of requests sending from masters to slaves. +#[repr(u32)] +#[allow(unused, non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum MasterReq { + /// Null operation. + NOOP = 0, + /// Get from the underlying vhost implementation the features bit mask. + GET_FEATURES = 1, + /// Enable features in the underlying vhost implementation using a bit mask. + SET_FEATURES = 2, + /// Set the current Master as an owner of the session. + SET_OWNER = 3, + /// No longer used. + RESET_OWNER = 4, + /// Set the memory map regions on the slave so it can translate the vring addresses. + SET_MEM_TABLE = 5, + /// Set logging shared memory space. + SET_LOG_BASE = 6, + /// Set the logging file descriptor, which is passed as ancillary data. + SET_LOG_FD = 7, + /// Set the size of the queue. + SET_VRING_NUM = 8, + /// Set the addresses of the different aspects of the vring. + SET_VRING_ADDR = 9, + /// Set the base offset in the available vring. + SET_VRING_BASE = 10, + /// Get the available vring base offset. + GET_VRING_BASE = 11, + /// Set the event file descriptor for adding buffers to the vring. + SET_VRING_KICK = 12, + /// Set the event file descriptor to signal when buffers are used. + SET_VRING_CALL = 13, + /// Set the event file descriptor to signal when error occurs. + SET_VRING_ERR = 14, + /// Get the protocol feature bit mask from the underlying vhost implementation. + GET_PROTOCOL_FEATURES = 15, + /// Enable protocol features in the underlying vhost implementation. + SET_PROTOCOL_FEATURES = 16, + /// Query how many queues the backend supports. + GET_QUEUE_NUM = 17, + /// Signal slave to enable or disable corresponding vring. + SET_VRING_ENABLE = 18, + /// Ask vhost user backend to broadcast a fake RARP to notify the migration is terminated + /// for guest that does not support GUEST_ANNOUNCE. + SEND_RARP = 19, + /// Set host MTU value exposed to the guest. + NET_SET_MTU = 20, + /// Set the socket file descriptor for slave initiated requests. + SET_SLAVE_REQ_FD = 21, + /// Send IOTLB messages with struct vhost_iotlb_msg as payload. + IOTLB_MSG = 22, + /// Set the endianness of a VQ for legacy devices. + SET_VRING_ENDIAN = 23, + /// Fetch the contents of the virtio device configuration space. + GET_CONFIG = 24, + /// Change the contents of the virtio device configuration space. + SET_CONFIG = 25, + /// Create a session for crypto operation. + CREATE_CRYPTO_SESSION = 26, + /// Close a session for crypto operation. + CLOSE_CRYPTO_SESSION = 27, + /// Advise slave that a migration with postcopy enabled is underway. + POSTCOPY_ADVISE = 28, + /// Advise slave that a transition to postcopy mode has happened. + POSTCOPY_LISTEN = 29, + /// Advise that postcopy migration has now completed. + POSTCOPY_END = 30, + /// Get a shared buffer from slave. + GET_INFLIGHT_FD = 31, + /// Send the shared inflight buffer back to slave + SET_INFLIGHT_FD = 32, + /// Upper bound of valid commands. + MAX_CMD = 33, +} + +impl Into for MasterReq { + fn into(self) -> u32 { + self as u32 + } +} + +impl Req for MasterReq { + fn is_valid(&self) -> bool { + (*self > MasterReq::NOOP) && (*self < MasterReq::MAX_CMD) + } +} + +// Given a slice of sizes and the `skip_size`, return the offset of `skip_size` in the slice. +// For example: +// let iov_lens = vec![4, 4, 5]; +// let size = 6; +// assert_eq!(get_sub_iovs_offset(&iov_len, size), (1, 2)); +fn get_sub_iovs_offset(iov_lens: &[usize], skip_size: usize) -> (usize, usize) { + let mut size = skip_size; + let mut nr_skip = 0; + + for len in iov_lens { + if size >= *len { + size -= *len; + nr_skip += 1; + } else { + break; + } + } + (nr_skip, size) +} + +/// Common message header for vhost-user requests and replies. +/// A vhost-user message consists of 3 header fields and an optional payload. All numbers are in the +/// machine native byte order. +#[repr(packed)] +#[derive(Copy)] +pub(crate) struct VhostUserMsgHeader { + request: u32, + flags: u32, + size: u32, + _r: PhantomData, +} + +impl Debug for VhostUserMsgHeader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Point") + .field("request", &{ self.request }) + .field("flags", &{ self.flags }) + .field("size", &{ self.size }) + .finish() + } +} + +impl VhostUserMsgValidator for VhostUserMsgHeader { + #[allow(clippy::if_same_then_else)] + fn is_valid(&self) -> bool { + if !self.get_code().is_valid() { + return false; + } else if self.size as usize > MAX_MSG_SIZE { + return false; + } else if self.get_version() != 0x1 { + return false; + } else if (self.flags & VhostUserHeaderFlag::RESERVED_BITS.bits()) != 0 { + return false; + } + true + } +} + +impl Clone for VhostUserMsgHeader { + fn clone(&self) -> VhostUserMsgHeader { + *self + } +} + +impl VhostUserMsgHeader { + /// Create a new instance of `VhostUserMsgHeader`. + pub fn new(request: R, flags: u32, size: u32) -> Self { + // Default to protocol version 1 + let fl = (flags & VhostUserHeaderFlag::ALL_FLAGS.bits()) | 0x1; + VhostUserMsgHeader { + request: request.into(), + flags: fl, + size, + _r: PhantomData, + } + } + + /// Get message type. + pub fn get_code(&self) -> R { + // It's safe because R is marked as repr(u32). + unsafe { std::mem::transmute_copy::(&{ self.request }) } + } + + /// Get message version number. + pub fn get_version(&self) -> u32 { + self.flags & 0x3 + } +} + +impl Default for VhostUserMsgHeader { + fn default() -> Self { + VhostUserMsgHeader { + request: 0, + flags: 0x1, + size: 0, + _r: PhantomData, + } + } +} + +/// Unix domain socket endpoint for vhost-user connection. +pub(crate) struct Endpoint { + sock: UnixStream, + _r: PhantomData, +} + +impl Endpoint { + /// Create a new stream by connecting to server at `str`. + /// + /// # Return: + /// * - the new Endpoint object on success. + /// * - SocketConnect: failed to connect to peer. + pub fn connect(path: &str) -> Result { + let sock = UnixStream::connect(path).map_err(Error::SocketConnect)?; + Ok(Self::from_stream(sock)) + } + + /// Create an endpoint from a stream object. + pub fn from_stream(sock: UnixStream) -> Self { + Endpoint { + sock, + _r: PhantomData, + } + } + + /// Sends bytes from scatter-gather vectors over the socket with optional attached file + /// descriptors. + /// + /// # Return: + /// * - number of bytes sent on success + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + pub fn send_iovec(&mut self, iovs: &[&[u8]], fds: Option<&[RawFd]>) -> Result { + let rfds = match fds { + Some(rfds) => rfds, + _ => &[], + }; + self.sock.send_with_fds(iovs, rfds).map_err(Into::into) + } + + /// Sends all bytes from scatter-gather vectors over the socket with optional attached file + /// descriptors. Will loop until all data has been transfered. + /// + /// # Return: + /// * - number of bytes sent on success + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + pub fn send_iovec_all(&mut self, iovs: &[&[u8]], fds: Option<&[RawFd]>) -> Result { + let mut data_sent = 0; + let mut data_total = 0; + let iov_lens: Vec = iovs.iter().map(|iov| iov.len()).collect(); + for len in &iov_lens { + data_total += len; + } + + while (data_total - data_sent) > 0 { + let (nr_skip, offset) = get_sub_iovs_offset(&iov_lens, data_sent); + let iov = &iovs[nr_skip][offset..]; + + let data = &[&[iov], &iovs[(nr_skip + 1)..]].concat(); + let sfds = if data_sent == 0 { fds } else { None }; + + let sent = self.send_iovec(data, sfds); + match sent { + Ok(0) => return Ok(data_sent), + Ok(n) => data_sent += n, + Err(e) => match e { + Error::SocketRetry(_) => {} + _ => return Err(e), + }, + } + } + Ok(data_sent) + } + + /// Sends a header-only message with optional attached file descriptors. + /// + /// # Return: + /// * - number of bytes sent on success + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + /// * - PartialMessage: received a partial message. + pub fn send_header( + &mut self, + hdr: &VhostUserMsgHeader, + fds: Option<&[RawFd]>, + ) -> Result<()> { + // Safe because there can't be other mutable referance to hdr. + let iovs = unsafe { + [slice::from_raw_parts( + hdr as *const VhostUserMsgHeader as *const u8, + mem::size_of::>(), + )] + }; + let bytes = self.send_iovec_all(&iovs[..], fds)?; + if bytes != mem::size_of::>() { + return Err(Error::PartialMessage); + } + Ok(()) + } + + /// Send a message with header and body. Optional file descriptors may be attached to + /// the message. + /// + /// # Return: + /// * - number of bytes sent on success + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + /// * - PartialMessage: received a partial message. + pub fn send_message( + &mut self, + hdr: &VhostUserMsgHeader, + body: &T, + fds: Option<&[RawFd]>, + ) -> Result<()> { + // Safe because there can't be other mutable referance to hdr and body. + let iovs = unsafe { + [ + slice::from_raw_parts( + hdr as *const VhostUserMsgHeader as *const u8, + mem::size_of::>(), + ), + slice::from_raw_parts(body as *const T as *const u8, mem::size_of::()), + ] + }; + + let bytes = self.send_iovec_all(&iovs[..], fds)?; + if bytes != mem::size_of::>() + mem::size_of::() { + return Err(Error::PartialMessage); + } + Ok(()) + } + + /// Reads bytes from the socket into the given scatter/gather vectors with optional attached + /// file descriptors. + /// + /// The underlying communication channel is a Unix domain socket in STREAM mode. It's a little + /// tricky to pass file descriptors through such a communication channel. Let's assume that a + /// sender sending a message with some file descriptors attached. To successfully receive those + /// attached file descriptors, the receiver must obey following rules: + /// 1) file descriptors are attached to a message. + /// 2) message(packet) boundaries must be respected on the receive side. + /// In other words, recvmsg() operations must not cross the packet boundary, otherwise the + /// attached file descriptors will get lost. + /// + /// # Return: + /// * - (number of bytes received, [received fds]) on success + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + pub fn recv_into_iovec(&mut self, iovs: &mut [iovec]) -> Result<(usize, Option>)> { + let mut fd_array = vec![0; MAX_ATTACHED_FD_ENTRIES]; + let (bytes, fds) = unsafe { self.sock.recv_with_fds(iovs, &mut fd_array)? }; + let rfds = match fds { + 0 => None, + n => { + let mut fds = Vec::with_capacity(n); + fds.extend_from_slice(&fd_array[0..n]); + Some(fds) + } + }; + + Ok((bytes, rfds)) + } + + /// Reads all bytes from the socket into the given scatter/gather vectors with optional + /// attached file descriptors. Will loop until all data has been transfered. + /// + /// The underlying communication channel is a Unix domain socket in STREAM mode. It's a little + /// tricky to pass file descriptors through such a communication channel. Let's assume that a + /// sender sending a message with some file descriptors attached. To successfully receive those + /// attached file descriptors, the receiver must obey following rules: + /// 1) file descriptors are attached to a message. + /// 2) message(packet) boundaries must be respected on the receive side. + /// In other words, recvmsg() operations must not cross the packet boundary, otherwise the + /// attached file descriptors will get lost. + /// + /// # Return: + /// * - (number of bytes received, [received fds]) on success + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + pub fn recv_into_iovec_all( + &mut self, + iovs: &mut [iovec], + ) -> Result<(usize, Option>)> { + let mut data_read = 0; + let mut data_total = 0; + let mut rfds = None; + let iov_lens: Vec = iovs.iter().map(|iov| iov.iov_len).collect(); + for len in &iov_lens { + data_total += len; + } + + while (data_total - data_read) > 0 { + let (nr_skip, offset) = get_sub_iovs_offset(&iov_lens, data_read); + let iov = &mut iovs[nr_skip]; + + let mut data = [ + &[iovec { + iov_base: (iov.iov_base as usize + offset) as *mut c_void, + iov_len: iov.iov_len - offset, + }], + &iovs[(nr_skip + 1)..], + ] + .concat(); + + let res = self.recv_into_iovec(&mut data); + match res { + Ok((0, _)) => return Ok((data_read, rfds)), + Ok((n, fds)) => { + if data_read == 0 { + rfds = fds; + } + data_read += n; + } + Err(e) => match e { + Error::SocketRetry(_) => {} + _ => return Err(e), + }, + } + } + Ok((data_read, rfds)) + } + + /// Receive a header-only message with optional attached file descriptors. + /// Note, only the first MAX_ATTACHED_FD_ENTRIES file descriptors will be + /// accepted and all other file descriptor will be discard silently. + /// + /// # Return: + /// * - (message header, [received fds]) on success. + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + /// * - PartialMessage: received a partial message. + /// * - InvalidMessage: received a invalid message. + pub fn recv_header(&mut self) -> Result<(VhostUserMsgHeader, Option>)> { + let mut hdr = VhostUserMsgHeader::default(); + let mut iovs = [iovec { + iov_base: (&mut hdr as *mut VhostUserMsgHeader) as *mut c_void, + iov_len: mem::size_of::>(), + }]; + let (bytes, rfds) = self.recv_into_iovec_all(&mut iovs[..])?; + + if bytes != mem::size_of::>() { + return Err(Error::PartialMessage); + } else if !hdr.is_valid() { + return Err(Error::InvalidMessage); + } + + Ok((hdr, rfds)) + } + + /// Receive a message with optional attached file descriptors. + /// Note, only the first MAX_ATTACHED_FD_ENTRIES file descriptors will be + /// accepted and all other file descriptor will be discard silently. + /// + /// # Return: + /// * - (message header, message body, [received fds]) on success. + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + /// * - PartialMessage: received a partial message. + /// * - InvalidMessage: received a invalid message. + pub fn recv_body( + &mut self, + ) -> Result<(VhostUserMsgHeader, T, Option>)> { + let mut hdr = VhostUserMsgHeader::default(); + let mut body: T = Default::default(); + let mut iovs = [ + iovec { + iov_base: (&mut hdr as *mut VhostUserMsgHeader) as *mut c_void, + iov_len: mem::size_of::>(), + }, + iovec { + iov_base: (&mut body as *mut T) as *mut c_void, + iov_len: mem::size_of::(), + }, + ]; + let (bytes, rfds) = self.recv_into_iovec_all(&mut iovs[..])?; + + let total = mem::size_of::>() + mem::size_of::(); + if bytes != total { + return Err(Error::PartialMessage); + } else if !hdr.is_valid() || !body.is_valid() { + return Err(Error::InvalidMessage); + } + + Ok((hdr, body, rfds)) + } + + /// Send a message with header, body and payload. Optional file descriptors + /// may also be attached to the message. + /// + /// # Return: + /// * - number of bytes sent on success + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + /// * - OversizedMsg: message size is too big. + /// * - PartialMessage: received a partial message. + /// * - IncorrectFds: wrong number of attached fds. + pub fn send_message_with_payload( + &mut self, + hdr: &VhostUserMsgHeader, + body: &T, + payload: &[P], + fds: Option<&[RawFd]>, + ) -> Result<()> { + let len = payload.len() * mem::size_of::

(); + if len > MAX_MSG_SIZE - mem::size_of::() { + return Err(Error::OversizedMsg); + } + if let Some(fd_arr) = fds { + if fd_arr.len() > MAX_ATTACHED_FD_ENTRIES { + return Err(Error::IncorrectFds); + } + } + + // Safe because there can't be other mutable reference to hdr, body and payload. + let iovs = unsafe { + [ + slice::from_raw_parts( + hdr as *const VhostUserMsgHeader as *const u8, + mem::size_of::>(), + ), + slice::from_raw_parts(body as *const T as *const u8, mem::size_of::()), + slice::from_raw_parts(payload.as_ptr() as *const u8, len), + ] + }; + let total = mem::size_of::>() + mem::size_of::() + len; + let len = self.send_iovec_all(&iovs, fds)?; + if len != total { + return Err(Error::PartialMessage); + } + Ok(()) + } + + /// Receive a message with optional payload and attached file descriptors. + /// Note, only the first MAX_ATTACHED_FD_ENTRIES file descriptors will be + /// accepted and all other file descriptor will be discard silently. + /// + /// # Return: + /// * - (message header, message body, size of payload, [received fds]) on success. + /// * - SocketRetry: temporary error caused by signals or short of resources. + /// * - SocketBroken: the underline socket is broken. + /// * - SocketError: other socket related errors. + /// * - PartialMessage: received a partial message. + /// * - InvalidMessage: received a invalid message. + #[cfg_attr(feature = "cargo-clippy", allow(clippy::type_complexity))] + pub fn recv_payload_into_buf( + &mut self, + buf: &mut [u8], + ) -> Result<(VhostUserMsgHeader, T, usize, Option>)> { + let mut hdr = VhostUserMsgHeader::default(); + let mut body: T = Default::default(); + let mut iovs = [ + iovec { + iov_base: (&mut hdr as *mut VhostUserMsgHeader) as *mut c_void, + iov_len: mem::size_of::>(), + }, + iovec { + iov_base: (&mut body as *mut T) as *mut c_void, + iov_len: mem::size_of::(), + }, + iovec { + iov_base: buf.as_mut_ptr() as *mut c_void, + iov_len: buf.len(), + }, + ]; + let (bytes, rfds) = self.recv_into_iovec_all(&mut iovs[..])?; + + let total = mem::size_of::>() + mem::size_of::(); + if bytes < total { + return Err(Error::PartialMessage); + } else if !hdr.is_valid() || !body.is_valid() { + return Err(Error::InvalidMessage); + } + + Ok((hdr, body, bytes - total, rfds)) + } +} + +impl AsRawFd for Endpoint { + fn as_raw_fd(&self) -> RawFd { + self.sock.as_raw_fd() + } +} + +// Negotiate process from slave. +pub(crate) fn negotiate_slave( + slave: &mut Endpoint, + pfeatures: VhostUserProtocolFeatures, + use_ali_feature: bool, + has_protocol_mq: bool, + queue_num: u64, +) { + // set owner + let (hdr, rfds) = slave.recv_header().unwrap(); + assert_eq!(hdr.get_code(), MasterReq::SET_OWNER); + assert!(rfds.is_none()); + + // get features + let vfeatures = 0x15 | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(); + let hdr = VhostUserMsgHeader::new(MasterReq::GET_FEATURES, 0x4, 8); + let msg = VhostUserU64::new(vfeatures); + slave.send_message(&hdr, &msg, None).unwrap(); + let (hdr, _rfds) = slave.recv_header().unwrap(); + assert_eq!(hdr.get_code(), MasterReq::GET_FEATURES); + + // set features + let (hdr, _msg, rfds) = slave.recv_body::().unwrap(); + assert_eq!(hdr.get_code(), MasterReq::SET_FEATURES); + assert!(rfds.is_none()); + + // get vhost-user protocol features + let code = MasterReq::GET_PROTOCOL_FEATURES; + let (hdr, rfds) = slave.recv_header().unwrap(); + assert_eq!(hdr.get_code(), code); + assert!(rfds.is_none()); + let hdr = VhostUserMsgHeader::new(code, 0x4, 8); + let msg = VhostUserU64::new(pfeatures.bits()); + slave.send_message(&hdr, &msg, None).unwrap(); + + // set vhost-user protocol features + let (hdr, _msg, rfds) = slave.recv_body::().unwrap(); + assert_eq!(hdr.get_code(), MasterReq::SET_PROTOCOL_FEATURES); + assert!(rfds.is_none()); + + // set number of queues + if has_protocol_mq { + let (hdr, rfds) = slave.recv_header().unwrap(); + assert_eq!(hdr.get_code(), MasterReq::GET_QUEUE_NUM); + assert!(rfds.is_none()); + let hdr = VhostUserMsgHeader::new(MasterReq::GET_QUEUE_NUM, 0x4, 8); + let msg = VhostUserU64::new(queue_num); + slave.send_message(&hdr, &msg, None).unwrap(); + } + + // set vring call + for _i in 0..queue_num { + let (hdr, _msg, rfds) = slave.recv_body::().unwrap(); + assert_eq!(hdr.get_code(), MasterReq::SET_VRING_CALL); + assert!(rfds.is_some()); + } + + // set mem table + let mut region_buf: Vec = vec![0u8; mem::size_of::()]; + let (hdr, _msg, _payload, rfds) = slave + .recv_payload_into_buf::(&mut region_buf) + .unwrap(); + assert_eq!(hdr.get_code(), MasterReq::SET_MEM_TABLE); + assert!(rfds.is_some()); + + if pfeatures.contains(VhostUserProtocolFeatures::INFLIGHT_SHMFD) { + // get inflight fd + let (hdr, _msg, rfds) = slave.recv_body::().unwrap(); + assert_eq!(hdr.get_code(), MasterReq::GET_INFLIGHT_FD); + assert!(rfds.is_none()); + let msg = VhostUserInflight { + mmap_size: 0x100, + mmap_offset: 0x0, + ..Default::default() + }; + let inflight_file = TempFile::new().unwrap().into_file(); + inflight_file.set_len(0x100).unwrap(); + let fds = [inflight_file.as_raw_fd()]; + let hdr = VhostUserMsgHeader::new( + MasterReq::GET_INFLIGHT_FD, + VhostUserHeaderFlag::REPLY.bits(), + std::mem::size_of::() as u32, + ); + slave.send_message(&hdr, &msg, Some(&fds)).unwrap(); + + // set inflight fd + let (hdr, _msg, rfds) = slave.recv_body::().unwrap(); + assert_eq!(hdr.get_code(), MasterReq::SET_INFLIGHT_FD); + assert!(rfds.is_some()); + let hdr = VhostUserMsgHeader::new( + MasterReq::GET_INFLIGHT_FD, + VhostUserHeaderFlag::REPLY.bits(), + std::mem::size_of::() as u32, + ); + slave.send_header(&hdr, None).unwrap(); + } + + // set vring num + for _i in 0..queue_num { + let (hdr, _msg, rfds) = slave.recv_body::().unwrap(); + slave.send_header(&hdr, None).unwrap(); + assert!(rfds.is_none()); + } + + // set vring base + for _i in 0..queue_num { + let (hdr, _msg, rfds) = slave.recv_body::().unwrap(); + assert_eq!(hdr.get_code(), MasterReq::SET_VRING_BASE); + assert!(rfds.is_none()); + slave.send_header(&hdr, None).unwrap(); + } + + // set vring addr + for _i in 0..queue_num { + let (hdr, _msg, rfds) = slave.recv_body::().unwrap(); + assert_eq!(hdr.get_code(), MasterReq::SET_VRING_ADDR); + assert!(rfds.is_none()); + slave.send_header(&hdr, None).unwrap(); + } + + // set vring kick + for _i in 0..queue_num { + let (hdr, _msg, rfds) = slave.recv_body::().unwrap(); + assert_eq!(hdr.get_code(), MasterReq::SET_VRING_KICK); + assert!(rfds.is_some()); + } + + // set vring call + for _i in 0..queue_num { + let (hdr, _msg, rfds) = slave.recv_body::().unwrap(); + assert_eq!(hdr.get_code(), MasterReq::SET_VRING_CALL); + assert!(rfds.is_some()); + } + + // set vring enable + for _i in 0..queue_num { + let (hdr, _msg, rfds) = slave.recv_body::().unwrap(); + assert_eq!(hdr.get_code(), MasterReq::SET_VRING_ENABLE); + assert!(rfds.is_none()); + slave.send_header(&hdr, None).unwrap(); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vsock/epoll_handler.rs b/src/dragonball/src/dbs_virtio_devices/src/vsock/epoll_handler.rs index 5ffcd23c5595..62e98e74a55c 100644 --- a/src/dragonball/src/dbs_virtio_devices/src/vsock/epoll_handler.rs +++ b/src/dragonball/src/dbs_virtio_devices/src/vsock/epoll_handler.rs @@ -18,7 +18,7 @@ use super::defs; use super::muxer::{VsockGenericMuxer, VsockMuxer}; use super::packet::VsockPacket; use crate::device::VirtioDeviceConfig; -use crate::{DbsGuestAddressSpace, Result as VirtIoResult}; +use crate::{DbsGuestAddressSpace, Result as VirtioResult}; const QUEUE_RX: usize = 0; const QUEUE_TX: usize = 1; @@ -83,7 +83,7 @@ where /// Signal the guest driver that we've used some virtio buffers that it had /// previously made available. - pub(crate) fn signal_used_queue(&self, idx: usize) -> VirtIoResult<()> { + pub(crate) fn signal_used_queue(&self, idx: usize) -> VirtioResult<()> { trace!("{}: raising IRQ", self.id); self.config.queues[idx].notify().map_err(|e| { error!("{}: failed to signal used queue {}, {:?}", self.id, idx, e); diff --git a/src/dragonball/src/device_manager/balloon_dev_mgr.rs b/src/dragonball/src/device_manager/balloon_dev_mgr.rs index 329966adf4a5..97502bea3f4c 100644 --- a/src/dragonball/src/device_manager/balloon_dev_mgr.rs +++ b/src/dragonball/src/device_manager/balloon_dev_mgr.rs @@ -5,7 +5,7 @@ use dbs_virtio_devices as virtio; use serde_derive::{Deserialize, Serialize}; use slog::{error, info}; use virtio::balloon::{Balloon, BalloonConfig}; -use virtio::Error as VirtIoError; +use virtio::Error as VirtioError; use crate::address_space_manager::GuestAddressSpaceImpl; use crate::config_manager::{ConfigItem, DeviceConfigInfo, DeviceConfigInfos}; @@ -51,7 +51,7 @@ pub enum BalloonDeviceError { /// resize balloon device error #[error("failure while resizing virtio-balloon device, {0}")] - ResizeFailed(#[source] VirtIoError), + ResizeFailed(#[source] VirtioError), /// The balloon device id doesn't exist. #[error("invalid balloon device id '{0}'")] diff --git a/src/dragonball/src/device_manager/blk_dev_mgr.rs b/src/dragonball/src/device_manager/blk_dev_mgr.rs index 854edfc0977a..3d664abbe01c 100644 --- a/src/dragonball/src/device_manager/blk_dev_mgr.rs +++ b/src/dragonball/src/device_manager/blk_dev_mgr.rs @@ -45,7 +45,7 @@ macro_rules! error( }; ); -/// Default queue size for VirtIo block devices. +/// Default queue size for Virtio block devices. pub const QUEUE_SIZE: u16 = 128; /// Errors associated with the operations allowed on a drive. diff --git a/src/dragonball/src/device_manager/fs_dev_mgr.rs b/src/dragonball/src/device_manager/fs_dev_mgr.rs index dca0e649e35d..771f2b924511 100644 --- a/src/dragonball/src/device_manager/fs_dev_mgr.rs +++ b/src/dragonball/src/device_manager/fs_dev_mgr.rs @@ -6,7 +6,7 @@ use std::convert::TryInto; use dbs_utils::epoll_manager::EpollManager; -use dbs_virtio_devices::{self as virtio, Error as VirtIoError}; +use dbs_virtio_devices::{self as virtio, Error as VirtioError}; use serde_derive::{Deserialize, Serialize}; use slog::{error, info}; @@ -77,7 +77,7 @@ pub enum FsDeviceError { /// Creating a shared-fs device fails (if the vhost-user socket cannot be open.) #[error("cannot create shared-fs device: {0}")] - CreateFsDevice(#[source] VirtIoError), + CreateFsDevice(#[source] VirtioError), /// Cannot initialize a shared-fs device or add a device to the MMIO Bus. #[error("failure while registering shared-fs device: {0}")] diff --git a/src/dragonball/src/device_manager/mem_dev_mgr.rs b/src/dragonball/src/device_manager/mem_dev_mgr.rs index 2bb68ae80f9d..891149f25a38 100644 --- a/src/dragonball/src/device_manager/mem_dev_mgr.rs +++ b/src/dragonball/src/device_manager/mem_dev_mgr.rs @@ -15,7 +15,7 @@ use nix::sys::mman; use serde_derive::{Deserialize, Serialize}; use slog::{debug, error, info, warn}; use virtio::mem::{Mem, MemRegionFactory}; -use virtio::Error as VirtIoError; +use virtio::Error as VirtioError; use vm_memory::{ Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestRegionMmap, GuestUsize, MmapRegion, }; @@ -61,7 +61,7 @@ pub enum MemDeviceError { /// resize mem device error #[error("failure while resizing virtio-mem device, {0}")] - ResizeFailed(#[source] VirtIoError), + ResizeFailed(#[source] VirtioError), /// mem device does not exist #[error("mem device does not exist")] @@ -389,7 +389,7 @@ impl MemoryRegionFactory { }) } - fn configure_anon_mem(&self, mmap_reg: &MmapRegion) -> Result<(), VirtIoError> { + fn configure_anon_mem(&self, mmap_reg: &MmapRegion) -> Result<(), VirtioError> { unsafe { mman::madvise( mmap_reg.as_ptr() as *mut libc::c_void, @@ -397,15 +397,15 @@ impl MemoryRegionFactory { mman::MmapAdvise::MADV_DONTFORK, ) } - .map_err(VirtIoError::Madvise)?; + .map_err(VirtioError::Madvise)?; Ok(()) } - fn configure_numa(&self, mmap_reg: &MmapRegion, node_id: u32) -> Result<(), VirtIoError> { + fn configure_numa(&self, mmap_reg: &MmapRegion, node_id: u32) -> Result<(), VirtioError> { let nodemask = 1_u64 .checked_shl(node_id) - .ok_or(VirtIoError::InvalidInput)?; + .ok_or(VirtioError::InvalidInput)?; let res = unsafe { libc::syscall( libc::SYS_mbind, @@ -428,7 +428,7 @@ impl MemoryRegionFactory { Ok(()) } - fn configure_thp(&mut self, mmap_reg: &MmapRegion) -> Result<(), VirtIoError> { + fn configure_thp(&mut self, mmap_reg: &MmapRegion) -> Result<(), VirtioError> { debug!( self.logger, "Setting MADV_HUGEPAGE on AddressSpaceRegion addr {:x?} len {:x?}", @@ -445,7 +445,7 @@ impl MemoryRegionFactory { mman::MmapAdvise::MADV_HUGEPAGE, ) } - .map_err(VirtIoError::Madvise)?; + .map_err(VirtioError::Madvise)?; Ok(()) } @@ -455,7 +455,7 @@ impl MemoryRegionFactory { slot: u32, reg: &Arc, mmap_reg: &MmapRegion, - ) -> Result<(), VirtIoError> { + ) -> Result<(), VirtioError> { let host_addr = mmap_reg.as_ptr() as u64; let flags = 0u32; @@ -471,7 +471,7 @@ impl MemoryRegionFactory { // Safe because the user mem region is just created, and kvm slot is allocated // by resource allocator. unsafe { self.vm_fd.set_user_memory_region(mem_region) } - .map_err(VirtIoError::SetUserMemoryRegion)?; + .map_err(VirtioError::SetUserMemoryRegion)?; Ok(()) } @@ -483,7 +483,7 @@ impl MemRegionFactory for MemoryRegionFactory { guest_addr: GuestAddress, region_len: GuestUsize, kvm_slot: u32, - ) -> std::result::Result, VirtIoError> { + ) -> std::result::Result, VirtioError> { // create address space region let mem_type = self.vm_config.mem_type.as_str(); let mut mem_file_path = self.vm_config.mem_file_path.clone(); @@ -507,7 +507,7 @@ impl MemRegionFactory for MemoryRegionFactory { error!(self.logger, "failed to insert address space region: {}", e); // dbs-virtio-devices should not depend on dbs-address-space. // So here io::Error is used instead of AddressSpaceError directly. - VirtIoError::IOError(io::Error::new( + VirtioError::IOError(io::Error::new( io::ErrorKind::Other, format!( "invalid address space region ({0:#x}, {1:#x})", @@ -532,7 +532,7 @@ impl MemRegionFactory for MemoryRegionFactory { region.prot_flags(), region.perm_flags(), ) - .map_err(VirtIoError::NewMmapRegion)?; + .map_err(VirtioError::NewMmapRegion)?; let host_addr: u64 = mmap_region.as_ptr() as u64; // thp @@ -561,20 +561,20 @@ impl MemRegionFactory for MemoryRegionFactory { // All value should be valid. let memory_region = Arc::new( - GuestRegionMmap::new(mmap_region, guest_addr).map_err(VirtIoError::InsertMmap)?, + GuestRegionMmap::new(mmap_region, guest_addr).map_err(VirtioError::InsertMmap)?, ); let vm_as_new = self .vm_as .memory() .insert_region(memory_region.clone()) - .map_err(VirtIoError::InsertMmap)?; + .map_err(VirtioError::InsertMmap)?; self.vm_as.lock().unwrap().replace(vm_as_new); self.address_space.insert_region(region).map_err(|e| { error!(self.logger, "failed to insert address space region: {}", e); // dbs-virtio-devices should not depend on dbs-address-space. // So here io::Error is used instead of AddressSpaceError directly. - VirtIoError::IOError(io::Error::new( + VirtioError::IOError(io::Error::new( io::ErrorKind::Other, format!( "invalid address space region ({0:#x}, {1:#x})", @@ -589,7 +589,7 @@ impl MemRegionFactory for MemoryRegionFactory { fn restore_region_addr( &self, guest_addr: GuestAddress, - ) -> std::result::Result<*mut u8, VirtIoError> { + ) -> std::result::Result<*mut u8, VirtioError> { let memory = self.vm_as.memory(); // NOTE: We can't clone `GuestRegionMmap` reference directly!!! // @@ -604,7 +604,7 @@ impl MemRegionFactory for MemoryRegionFactory { // a memory exception! memory .get_host_address(guest_addr) - .map_err(VirtIoError::GuestMemory) + .map_err(VirtioError::GuestMemory) } fn get_host_numa_node_id(&self) -> Option { diff --git a/src/dragonball/src/device_manager/memory_region_handler.rs b/src/dragonball/src/device_manager/memory_region_handler.rs index fbf5aa20cbf0..baa35d1f7c35 100644 --- a/src/dragonball/src/device_manager/memory_region_handler.rs +++ b/src/dragonball/src/device_manager/memory_region_handler.rs @@ -6,7 +6,7 @@ use std::io; use std::sync::Arc; use dbs_address_space::{AddressSpace, AddressSpaceRegion, AddressSpaceRegionType}; -use dbs_virtio_devices::{Error as VirtIoError, VirtioRegionHandler}; +use dbs_virtio_devices::{Error as VirtioError, VirtioRegionHandler}; use log::{debug, error}; use vm_memory::{FileOffset, GuestAddressSpace, GuestMemoryRegion, GuestRegionMmap}; @@ -41,7 +41,7 @@ impl DeviceVirtioRegionHandler { fn insert_address_space( &mut self, region: Arc, - ) -> std::result::Result<(), VirtIoError> { + ) -> std::result::Result<(), VirtioError> { let file_offset = match region.file_offset() { // TODO: use from_arc Some(f) => Some(FileOffset::new(f.file().try_clone()?, 0)), @@ -63,7 +63,7 @@ impl DeviceVirtioRegionHandler { error!("inserting address apace error: {}", e); // dbs-virtio-devices should not depend on dbs-address-space. // So here io::Error is used instead of AddressSpaceError directly. - VirtIoError::IOError(io::Error::new( + VirtioError::IOError(io::Error::new( io::ErrorKind::Other, format!( "invalid address space region ({0:#x}, {1:#x})", @@ -78,13 +78,13 @@ impl DeviceVirtioRegionHandler { fn insert_vm_as( &mut self, region: Arc, - ) -> std::result::Result<(), VirtIoError> { + ) -> std::result::Result<(), VirtioError> { let vm_as_new = self.vm_as.memory().insert_region(region).map_err(|e| { error!( "DeviceVirtioRegionHandler failed to insert guest memory region: {:?}.", e ); - VirtIoError::InsertMmap(e) + VirtioError::InsertMmap(e) })?; // Do not expect poisoned lock here, so safe to unwrap(). self.vm_as.lock().unwrap().replace(vm_as_new); @@ -97,7 +97,7 @@ impl VirtioRegionHandler for DeviceVirtioRegionHandler { fn insert_region( &mut self, region: Arc, - ) -> std::result::Result<(), VirtIoError> { + ) -> std::result::Result<(), VirtioError> { debug!( "add geust memory region to address_space/vm_as, new region: {:?}", region diff --git a/src/dragonball/src/error.rs b/src/dragonball/src/error.rs index 35b92244f8a1..126c1dd9fa9f 100644 --- a/src/dragonball/src/error.rs +++ b/src/dragonball/src/error.rs @@ -12,7 +12,7 @@ #[cfg(target_arch = "aarch64")] use dbs_arch::pmu::PmuError; #[cfg(feature = "dbs-virtio-devices")] -use dbs_virtio_devices::Error as VirtIoError; +use dbs_virtio_devices::Error as VirtioError; use crate::{address_space_manager, device_manager, resource_manager, vcpu, vm}; @@ -149,7 +149,7 @@ pub enum StartMicroVmError { #[cfg(feature = "virtio-vsock")] /// Failed to create the vsock device. #[error("cannot create virtio-vsock device: {0}")] - CreateVsockDevice(#[source] VirtIoError), + CreateVsockDevice(#[source] VirtioError), #[cfg(feature = "virtio-vsock")] /// Cannot initialize a MMIO Vsock Device or add a device to the MMIO Bus. @@ -241,5 +241,5 @@ pub enum EpollError { #[cfg(feature = "dbs-virtio-devices")] /// Errors from virtio devices. #[error("failed to manager Virtio device: {0}")] - VirtIoDevice(#[source] VirtIoError), + VirtioDevice(#[source] VirtioError), } diff --git a/src/dragonball/src/vm/x86_64.rs b/src/dragonball/src/vm/x86_64.rs index 9593e82761ef..4aedeafd8eef 100644 --- a/src/dragonball/src/vm/x86_64.rs +++ b/src/dragonball/src/vm/x86_64.rs @@ -59,7 +59,7 @@ fn configure_system( let himem_start = GuestAddress(layout::HIMEM_START); // Note that this puts the mptable at the last 1k of Linux's 640k base RAM - mptable::setup_mptable(guest_mem, boot_cpus, max_cpus).map_err(Error::MpTableSetup)?; + mptable::setup_mptable(guest_mem, boot_cpus, max_cpus, None).map_err(Error::MpTableSetup)?; let mut params: BootParamsWrapper = BootParamsWrapper(bootparam::boot_params::default()); diff --git a/src/libs/kata-sys-util/src/protection.rs b/src/libs/kata-sys-util/src/protection.rs index aecb5837d4c0..51352a9d458d 100644 --- a/src/libs/kata-sys-util/src/protection.rs +++ b/src/libs/kata-sys-util/src/protection.rs @@ -152,7 +152,7 @@ pub fn arch_guest_protection( let major_version_str = major_version_str.trim_start_matches(HEX_PREFIX); - let major_version = u32::from_str_radix(&major_version_str, HEX_BASE) + let major_version = u32::from_str_radix(major_version_str, HEX_BASE) .map_err(|e| ProtectionError::FileInvalid(major_file, anyhow!(e)))?; let minor_version_str = std::fs::read_to_string(minor_file.clone()).map_err(|e| { @@ -161,7 +161,7 @@ pub fn arch_guest_protection( let minor_version_str = minor_version_str.trim_start_matches(HEX_PREFIX); - let minor_version = u32::from_str_radix(&minor_version_str, HEX_BASE) + let minor_version = u32::from_str_radix(minor_version_str, HEX_BASE) .map_err(|e| ProtectionError::FileInvalid(minor_file, anyhow!(e)))?; let details = TDXDetails { diff --git a/src/libs/kata-types/src/config/shared_mount.rs b/src/libs/kata-types/src/config/shared_mount.rs index 2370ba81aab2..3dd2d7f41954 100644 --- a/src/libs/kata-types/src/config/shared_mount.rs +++ b/src/libs/kata-types/src/config/shared_mount.rs @@ -36,19 +36,19 @@ pub struct SharedMount { impl SharedMount { pub fn validate(&self) -> Result<()> { - if self.name == "" { + if self.name.is_empty() { return Err(eother!("shared_mount: field 'name' couldn't be empty.")); } - if self.src_ctr == "" { + if self.src_ctr.is_empty() { return Err(eother!("shared_mount: field 'src_ctr' couldn't be empty.")); } - if self.dst_ctr == "" { + if self.dst_ctr.is_empty() { return Err(eother!("shared_mount: field 'dst_ctr' couldn't be empty.")); } - if self.src_path == "" { + if self.src_path.is_empty() { return Err(eother!("shared_mount: field 'src_path' couldn't be empty.")); } - if self.dst_path == "" { + if self.dst_path.is_empty() { return Err(eother!("shared_mount: field 'dst_path' couldn't be empty.")); } diff --git a/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs b/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs index a5a918949002..985edd978981 100644 --- a/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs +++ b/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs @@ -10,8 +10,8 @@ use crate::BlockDevice; use crate::HybridVsockDevice; use crate::NetworkConfig; use crate::PciPath; +use crate::ShareFsConfig; use crate::ShareFsDevice; -use crate::ShareFsDeviceConfig; use crate::VfioDevice; use crate::VmmState; use anyhow::{anyhow, Context, Result}; @@ -108,6 +108,10 @@ impl CloudHypervisorInner { } } + pub(crate) async fn update_device(&mut self, _device: DeviceType) -> Result<()> { + Ok(()) + } + async fn handle_share_fs_device(&mut self, sharefs: ShareFsDevice) -> Result { let device: ShareFsDevice = sharefs.clone(); if device.config.fs_type != VIRTIO_FS { @@ -363,12 +367,12 @@ impl TryFrom for NetConfig { } #[derive(Debug)] pub struct ShareFsSettings { - cfg: ShareFsDeviceConfig, + cfg: ShareFsConfig, vm_path: String, } impl ShareFsSettings { - pub fn new(cfg: ShareFsDeviceConfig, vm_path: String) -> Self { + pub fn new(cfg: ShareFsConfig, vm_path: String) -> Self { ShareFsSettings { cfg, vm_path } } } diff --git a/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs b/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs index 0eca4cd2ecca..e10a557e0678 100644 --- a/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs +++ b/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs @@ -140,13 +140,19 @@ impl CloudHypervisorInner { // Start by adding the default set of kernel parameters. let mut params = KernelParams::new(enable_debug); + #[cfg(target_arch = "x86_64")] + let console_param_debug = KernelParams::from_string("console=ttyS0,115200n8"); + + #[cfg(target_arch = "aarch64")] + let console_param_debug = KernelParams::from_string("console=ttyAMA0,115200n8"); + let mut rootfs_param = KernelParams::new_rootfs_kernel_params(rootfs_driver, rootfs_type)?; let mut console_params = if enable_debug { if confidential_guest { KernelParams::from_string("console=hvc0") } else { - KernelParams::from_string("console=ttyS0,115200n8") + console_param_debug } } else { KernelParams::from_string("quiet") diff --git a/src/runtime-rs/crates/hypervisor/src/ch/mod.rs b/src/runtime-rs/crates/hypervisor/src/ch/mod.rs index 37f52d11cbd7..6f8b6c269dcd 100644 --- a/src/runtime-rs/crates/hypervisor/src/ch/mod.rs +++ b/src/runtime-rs/crates/hypervisor/src/ch/mod.rs @@ -89,6 +89,11 @@ impl Hypervisor for CloudHypervisor { inner.remove_device(device).await } + async fn update_device(&self, device: DeviceType) -> Result<()> { + let mut inner = self.inner.write().await; + inner.update_device(device).await + } + async fn get_agent_socket(&self) -> Result { let inner = self.inner.write().await; inner.get_agent_socket().await diff --git a/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs b/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs index 4d14dea88ff0..8d71ecbccd1d 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs @@ -12,8 +12,8 @@ use tokio::sync::{Mutex, RwLock}; use crate::{ vhost_user_blk::VhostUserBlkDevice, BlockConfig, BlockDevice, HybridVsockDevice, Hypervisor, - NetworkDevice, VfioDevice, VhostUserConfig, KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE, - KATA_NVDIMM_DEV_TYPE, VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI, VIRTIO_PMEM, + NetworkDevice, ShareFsDevice, VfioDevice, VhostUserConfig, KATA_BLK_DEV_TYPE, + KATA_MMIO_BLK_DEV_TYPE, KATA_NVDIMM_DEV_TYPE, VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI, VIRTIO_PMEM, }; use super::{ @@ -226,6 +226,11 @@ impl DeviceManager { return Some(device_id.to_string()); } } + DeviceType::ShareFs(device) => { + if device.config.host_shared_path == host_path { + return Some(device_id.to_string()); + } + } _ => { // TODO: support find other device type continue; @@ -325,6 +330,22 @@ impl DeviceManager { // No need to do find device for hybrid vsock device. Arc::new(Mutex::new(HybridVsockDevice::new(&device_id, hvconfig))) } + DeviceConfig::ShareFsCfg(config) => { + // Try to find the sharefs device. If found, just return matched device id. + if let Some(device_id_matched) = + self.find_device(config.host_shared_path.clone()).await + { + info!( + sl!(), + "share-fs device with path:{:?} found, device id: {:?}", + config.host_shared_path, + device_id_matched + ); + return Ok(device_id_matched); + } + + Arc::new(Mutex::new(ShareFsDevice::new(&device_id, config))) + } _ => { return Err(anyhow!("invliad device type")); } @@ -437,6 +458,62 @@ impl DeviceManager { Err(anyhow!("ID are exhausted")) } + + async fn try_update_device(&mut self, updated_config: &DeviceConfig) -> Result<()> { + let device_id = match updated_config { + DeviceConfig::ShareFsCfg(config) => { + // Try to find the sharefs device. + // If found, just return the matched device id, otherwise return an error. + if let Some(device_id_matched) = + self.find_device(config.host_shared_path.clone()).await + { + device_id_matched + } else { + return Err(anyhow!( + "no matching device was found to do the update operation" + )); + } + } + // TODO for other Device Type + _ => { + return Err(anyhow!("update device with unsupported device type")); + } + }; + + // get the original device + let target_device = self + .get_device_info(&device_id) + .await + .context("get device failed")?; + + // update device with the updated configuration. + let updated_device: ArcMutexDevice = match target_device { + DeviceType::ShareFs(mut device) => { + if let DeviceConfig::ShareFsCfg(config) = updated_config { + // update the mount_config. + device.config.mount_config = config.mount_config.clone(); + } + Arc::new(Mutex::new(device)) + } + _ => return Err(anyhow!("update unsupported device type")), + }; + + // do handle update + if let Err(e) = updated_device + .lock() + .await + .update(self.hypervisor.as_ref()) + .await + { + debug!(sl!(), "update device with device id: {:?}", &device_id); + return Err(e); + } + + // Finally, we update the Map in Device Manager + self.devices.insert(device_id, updated_device); + + Ok(()) + } } // Many scenarios have similar steps when adding devices. so to reduce duplicated code, @@ -472,6 +549,19 @@ pub async fn do_handle_device( Ok(device_info) } +pub async fn do_update_device( + d: &RwLock, + updated_config: &DeviceConfig, +) -> Result<()> { + d.write() + .await + .try_update_device(updated_config) + .await + .context("failed to update device")?; + + Ok(()) +} + pub async fn get_block_driver(d: &RwLock) -> String { d.read().await.get_block_driver().await } diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/mod.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/mod.rs index 0818d7cec2c9..c8eb70e56404 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/mod.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/mod.rs @@ -20,8 +20,7 @@ pub use virtio_blk::{ VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI, VIRTIO_PMEM, }; pub use virtio_fs::{ - ShareFsDevice, ShareFsDeviceConfig, ShareFsMountConfig, ShareFsMountDevice, ShareFsMountType, - ShareFsOperation, + ShareFsConfig, ShareFsDevice, ShareFsMountConfig, ShareFsMountOperation, ShareFsMountType, }; pub use virtio_net::{Address, Backend, NetworkConfig, NetworkDevice}; pub use virtio_vsock::{ diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs index a689bcb355f3..07890de659c6 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs @@ -543,6 +543,11 @@ impl Device for VfioDevice { Ok(device_index) } + async fn update(&mut self, _h: &dyn hypervisor) -> Result<()> { + // There's no need to do update for vfio device + Ok(()) + } + async fn increase_attach_count(&mut self) -> Result { match self.attach_count { 0 => { diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user_blk.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user_blk.rs index 0912f89f1ab9..5150f195634a 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user_blk.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user_blk.rs @@ -86,6 +86,11 @@ impl Device for VhostUserBlkDevice { Ok(Some(self.config.index)) } + async fn update(&mut self, _h: &dyn hypervisor) -> Result<()> { + // There's no need to do update for vhost-user-blk + Ok(()) + } + async fn get_device_info(&self) -> DeviceType { DeviceType::VhostUserBlk(self.clone()) } diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs index 1de3ff3897e6..a93f8553d95a 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs @@ -114,6 +114,11 @@ impl Device for BlockDevice { Ok(Some(self.config.index)) } + async fn update(&mut self, _h: &dyn hypervisor) -> Result<()> { + // There's no need to do update for virtio-blk + Ok(()) + } + async fn get_device_info(&self) -> DeviceType { DeviceType::Block(self.clone()) } diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_fs.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_fs.rs index 0a97845e71d0..e968606de35c 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_fs.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_fs.rs @@ -4,21 +4,28 @@ // SPDX-License-Identifier: Apache-2.0 // -#[derive(Copy, Clone, Debug)] -pub enum ShareFsOperation { +use anyhow::{Context, Result}; +use async_trait::async_trait; + +use crate::device::{hypervisor, Device, DeviceType}; + +#[derive(Copy, Clone, Debug, Default)] +pub enum ShareFsMountOperation { + #[default] Mount, Umount, Update, } -#[derive(Debug, Clone)] +#[derive(Debug, Default, Clone)] pub enum ShareFsMountType { + #[default] PASSTHROUGH, RAFS, } /// ShareFsMountConfig: share fs mount config -#[derive(Debug, Clone)] +#[derive(Clone, Debug, Default)] pub struct ShareFsMountConfig { /// source: the passthrough fs exported dir or rafs meta file of rafs pub source: String, @@ -36,20 +43,19 @@ pub struct ShareFsMountConfig { pub tag: String, /// op: the operation to take, e.g. mount, umount or update - pub op: ShareFsOperation, + pub op: ShareFsMountOperation, /// prefetch_list_path: path to file that contains file lists that should be prefetched by rafs pub prefetch_list_path: Option, } -#[derive(Debug, Clone)] -pub struct ShareFsMountDevice { - pub config: ShareFsMountConfig, -} +/// ShareFsConfig: Sharefs config for virtio-fs devices and their corresponding mount configurations, +/// facilitating mount/umount/update operations. +#[derive(Clone, Debug, Default)] +pub struct ShareFsConfig { + /// host_shared_path: the upperdir of the passthrough fs exported dir or rafs meta file of rafs + pub host_shared_path: String, -/// ShareFsDeviceConfig: share fs device config -#[derive(Debug, Clone)] -pub struct ShareFsDeviceConfig { /// fs_type: virtiofs or inline-virtiofs pub fs_type: String, @@ -59,9 +65,6 @@ pub struct ShareFsDeviceConfig { /// mount_tag: a label used as a hint to the guest. pub mount_tag: String, - /// host_path: the host filesystem path for this volume. - pub host_path: String, - /// queue_size: queue size pub queue_size: u64, @@ -70,9 +73,65 @@ pub struct ShareFsDeviceConfig { /// options: virtiofs device's config options. pub options: Vec, + + /// mount config for sharefs mount/umount/update + pub mount_config: Option, } -#[derive(Debug, Clone)] +#[derive(Debug, Default, Clone)] pub struct ShareFsDevice { - pub config: ShareFsDeviceConfig, + /// device id for sharefs device in device manager + pub device_id: String, + + /// config for sharefs device + pub config: ShareFsConfig, +} + +impl ShareFsDevice { + // new creates a share-fs device + pub fn new(device_id: &str, config: &ShareFsConfig) -> Self { + Self { + device_id: device_id.to_string(), + config: config.clone(), + } + } +} + +#[async_trait] +impl Device for ShareFsDevice { + async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> { + h.add_device(DeviceType::ShareFs(self.clone())) + .await + .context("add share-fs device.")?; + + Ok(()) + } + + async fn detach(&mut self, _h: &dyn hypervisor) -> Result> { + // no need to detach share-fs device + + Ok(None) + } + + async fn update(&mut self, h: &dyn hypervisor) -> Result<()> { + h.update_device(DeviceType::ShareFs(self.clone())) + .await + .context("update share-fs device.") + } + + async fn get_device_info(&self) -> DeviceType { + DeviceType::ShareFs(self.clone()) + } + + async fn increase_attach_count(&mut self) -> Result { + // share-fs devices will not be attached multiple times, Just return Ok(false) + + Ok(false) + } + + async fn decrease_attach_count(&mut self) -> Result { + // share-fs devices will not be detached multiple times, Just return Ok(false) + + Ok(false) + } } diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_net.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_net.rs index c05503ab53ee..eac28d81d90e 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_net.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_net.rs @@ -95,6 +95,11 @@ impl Device for NetworkDevice { Ok(Some(self.config.index)) } + async fn update(&mut self, _h: &dyn hypervisor) -> Result<()> { + // There's no need to do update for network device + Ok(()) + } + async fn get_device_info(&self) -> DeviceType { DeviceType::Network(self.clone()) } diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_vsock.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_vsock.rs index 98f48e29a98b..6ca88795363c 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_vsock.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_vsock.rs @@ -62,6 +62,11 @@ impl Device for HybridVsockDevice { Ok(None) } + async fn update(&mut self, _h: &dyn hypervisor) -> Result<()> { + // There's no need to do update for hvsock device + Ok(()) + } + async fn get_device_info(&self) -> DeviceType { DeviceType::HybridVsock(self.clone()) } diff --git a/src/runtime-rs/crates/hypervisor/src/device/mod.rs b/src/runtime-rs/crates/hypervisor/src/device/mod.rs index 59bb7540d8d1..b40f784ddb27 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/mod.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/mod.rs @@ -9,8 +9,8 @@ use std::fmt; use crate::device::driver::vhost_user_blk::VhostUserBlkDevice; use crate::{ BlockConfig, BlockDevice, HybridVsockConfig, HybridVsockDevice, Hypervisor as hypervisor, - NetworkConfig, NetworkDevice, ShareFsDevice, ShareFsDeviceConfig, ShareFsMountConfig, - ShareFsMountDevice, VfioConfig, VfioDevice, VhostUserConfig, VsockConfig, + NetworkConfig, NetworkDevice, ShareFsConfig, ShareFsDevice, VfioConfig, VfioDevice, + VhostUserConfig, VsockConfig, }; use anyhow::Result; use async_trait::async_trait; @@ -24,9 +24,8 @@ pub enum DeviceConfig { BlockCfg(BlockConfig), VhostUserBlkCfg(VhostUserConfig), NetworkCfg(NetworkConfig), - ShareFsCfg(ShareFsDeviceConfig), + ShareFsCfg(ShareFsConfig), VfioCfg(VfioConfig), - ShareFsMountCfg(ShareFsMountConfig), VsockCfg(VsockConfig), HybridVsockCfg(HybridVsockConfig), } @@ -38,7 +37,6 @@ pub enum DeviceType { Vfio(VfioDevice), Network(NetworkDevice), ShareFs(ShareFsDevice), - ShareFsMount(ShareFsMountDevice), HybridVsock(HybridVsockDevice), } @@ -54,6 +52,8 @@ pub trait Device: std::fmt::Debug + Send + Sync { async fn attach(&mut self, h: &dyn hypervisor) -> Result<()>; // detach is to unplug device from VM async fn detach(&mut self, h: &dyn hypervisor) -> Result>; + // update is to do update for some device + async fn update(&mut self, h: &dyn hypervisor) -> Result<()>; // get_device_info returns device config async fn get_device_info(&self) -> DeviceType; // increase_attach_count is used to increase the attach count for a device diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs index 90c1ae316942..7f86b4d3a8f5 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs @@ -14,8 +14,8 @@ use dragonball::device_manager::blk_dev_mgr::BlockDeviceType; use super::DragonballInner; use crate::{ - device::DeviceType, HybridVsockConfig, NetworkConfig, ShareFsDeviceConfig, ShareFsMountConfig, - ShareFsMountType, ShareFsOperation, VfioBusMode, VfioDevice, VmmState, JAILER_ROOT, + device::DeviceType, HybridVsockConfig, NetworkConfig, ShareFsConfig, ShareFsMountConfig, + ShareFsMountOperation, ShareFsMountType, VfioBusMode, VfioDevice, VmmState, JAILER_ROOT, }; const MB_TO_B: u32 = 1024 * 1024; @@ -67,9 +67,6 @@ impl DragonballInner { DeviceType::ShareFs(sharefs) => self .add_share_fs_device(&sharefs.config) .context("add share fs device"), - DeviceType::ShareFsMount(sharefs_mount) => self - .add_share_fs_mount(&sharefs_mount.config) - .context("add share fs mount"), } } @@ -101,6 +98,18 @@ impl DragonballInner { } } + pub(crate) async fn update_device(&mut self, device: DeviceType) -> Result<()> { + info!(sl!(), "dragonball update device {:?}", &device); + match device { + DeviceType::ShareFs(sharefs_mount) => { + // It's safe to unwrap mount config as mount_config is always there. + self.add_share_fs_mount(&sharefs_mount.config.mount_config.unwrap()) + .context("update share-fs device with mount operation.") + } + _ => Err(anyhow!("unsupported device {:?} to update.", device)), + } + } + fn add_vfio_device(&mut self, device: &VfioDevice) -> Result<()> { let vfio_device = device.clone(); @@ -285,7 +294,7 @@ impl DragonballInner { Ok(()) } - fn add_share_fs_device(&self, config: &ShareFsDeviceConfig) -> Result<()> { + fn add_share_fs_device(&self, config: &ShareFsConfig) -> Result<()> { let mut fs_cfg = FsDeviceConfigInfo { sock_path: config.sock_path.clone(), tag: config.mount_tag.clone(), @@ -337,9 +346,9 @@ impl DragonballInner { fn add_share_fs_mount(&mut self, config: &ShareFsMountConfig) -> Result<()> { let ops = match config.op { - ShareFsOperation::Mount => "mount", - ShareFsOperation::Umount => "umount", - ShareFsOperation::Update => "update", + ShareFsMountOperation::Mount => "mount", + ShareFsMountOperation::Umount => "umount", + ShareFsMountOperation::Update => "update", }; let fstype = match config.fstype { diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs index f4cb798bc478..399f8ca1b3a7 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs @@ -110,6 +110,11 @@ impl Hypervisor for Dragonball { inner.remove_device(device).await } + async fn update_device(&self, device: DeviceType) -> Result<()> { + let mut inner = self.inner.write().await; + inner.update_device(device).await + } + async fn get_agent_socket(&self) -> Result { let inner = self.inner.read().await; inner.get_agent_socket().await diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs index ec3b7b6fbdba..4311fdf98daa 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs @@ -26,7 +26,7 @@ use nix::sched::{setns, CloneFlags}; use seccompiler::BpfProgram; use vmm_sys_util::eventfd::EventFd; -use crate::ShareFsOperation; +use crate::ShareFsMountOperation; pub enum Request { Sync(VmmAction), @@ -238,7 +238,7 @@ impl VmmInstance { Ok(()) } - pub fn patch_fs(&self, cfg: &FsMountConfigInfo, op: ShareFsOperation) -> Result<()> { + pub fn patch_fs(&self, cfg: &FsMountConfigInfo, op: ShareFsMountOperation) -> Result<()> { self.handle_request(Request::Sync(VmmAction::ManipulateFsBackendFs(cfg.clone()))) .with_context(|| { format!( diff --git a/src/runtime-rs/crates/hypervisor/src/lib.rs b/src/runtime-rs/crates/hypervisor/src/lib.rs index deb7c92428fd..737133de50b3 100644 --- a/src/runtime-rs/crates/hypervisor/src/lib.rs +++ b/src/runtime-rs/crates/hypervisor/src/lib.rs @@ -87,6 +87,7 @@ pub trait Hypervisor: std::fmt::Debug + Send + Sync { // device manager async fn add_device(&self, device: DeviceType) -> Result; async fn remove_device(&self, device: DeviceType) -> Result<()>; + async fn update_device(&self, device: DeviceType) -> Result<()>; // utils async fn get_agent_socket(&self) -> Result; diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs b/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs index 80f86a56eb2e..e4a3c4e0b0c8 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs @@ -155,4 +155,10 @@ impl QemuInner { info!(sl!(), "QemuInner::remove_device() {} ", device); todo!() } + + pub(crate) async fn update_device(&mut self, device: DeviceType) -> Result<()> { + info!(sl!(), "QemuInner::update_device() {:?}", &device); + + Ok(()) + } } diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs b/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs index d26468632ee1..65b4b6e57d2f 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs @@ -84,6 +84,11 @@ impl Hypervisor for Qemu { inner.remove_device(device).await } + async fn update_device(&self, device: DeviceType) -> Result<()> { + let mut inner = self.inner.write().await; + inner.update_device(device).await + } + async fn get_agent_socket(&self) -> Result { let inner = self.inner.read().await; inner.get_agent_socket().await diff --git a/src/runtime-rs/crates/resource/src/manager_inner.rs b/src/runtime-rs/crates/resource/src/manager_inner.rs index 53b3ac4a1290..2ee6a5a0545e 100644 --- a/src/runtime-rs/crates/resource/src/manager_inner.rs +++ b/src/runtime-rs/crates/resource/src/manager_inner.rs @@ -102,7 +102,10 @@ impl ResourceManagerInner { { let share_fs = share_fs::new(&self.sid, &c).context("new share fs")?; share_fs - .setup_device_before_start_vm(self.hypervisor.as_ref()) + .setup_device_before_start_vm( + self.hypervisor.as_ref(), + &self.device_manager, + ) .await .context("setup share fs device before start vm")?; @@ -212,7 +215,7 @@ impl ResourceManagerInner { pub async fn setup_after_start_vm(&mut self) -> Result<()> { if let Some(share_fs) = self.share_fs.as_ref() { share_fs - .setup_device_after_start_vm(self.hypervisor.as_ref()) + .setup_device_after_start_vm(self.hypervisor.as_ref(), &self.device_manager) .await .context("setup share fs device after start vm")?; } @@ -227,6 +230,7 @@ impl ResourceManagerInner { .context("handle neighbors")?; self.handle_routes(network).await.context("handle routes")?; } + Ok(()) } diff --git a/src/runtime-rs/crates/resource/src/rootfs/mod.rs b/src/runtime-rs/crates/resource/src/rootfs/mod.rs index 75510ed027ad..8975929c6b9e 100644 --- a/src/runtime-rs/crates/resource/src/rootfs/mod.rs +++ b/src/runtime-rs/crates/resource/src/rootfs/mod.rs @@ -23,6 +23,7 @@ use self::{block_rootfs::is_block_rootfs, nydus_rootfs::NYDUS_ROOTFS_TYPE}; const ROOTFS: &str = "rootfs"; const HYBRID_ROOTFS_LOWER_DIR: &str = "rootfs_lower"; const TYPE_OVERLAY_FS: &str = "overlay"; + #[async_trait] pub trait Rootfs: Send + Sync { async fn get_guest_rootfs_path(&self) -> Result; @@ -102,9 +103,16 @@ impl RootFsResource { // handle nydus rootfs let share_rootfs: Arc = if layer.fs_type == NYDUS_ROOTFS_TYPE { Arc::new( - nydus_rootfs::NydusRootfs::new(share_fs, h, sid, cid, layer) - .await - .context("new nydus rootfs")?, + nydus_rootfs::NydusRootfs::new( + device_manager, + share_fs, + h, + sid, + cid, + layer, + ) + .await + .context("new nydus rootfs")?, ) } // handle sharefs rootfs diff --git a/src/runtime-rs/crates/resource/src/rootfs/nydus_rootfs.rs b/src/runtime-rs/crates/resource/src/rootfs/nydus_rootfs.rs index e6419bf7ec2f..a6c40489a8ec 100644 --- a/src/runtime-rs/crates/resource/src/rootfs/nydus_rootfs.rs +++ b/src/runtime-rs/crates/resource/src/rootfs/nydus_rootfs.rs @@ -39,6 +39,7 @@ pub(crate) struct NydusRootfs { impl NydusRootfs { pub async fn new( + d: &RwLock, share_fs: &Arc, h: &dyn Hypervisor, sid: &str, @@ -61,7 +62,8 @@ impl NydusRootfs { // rafs mount the metadata of nydus rootfs let rafs_mnt = do_get_guest_share_path(HYBRID_ROOTFS_LOWER_DIR, cid, true); rafs_mount( - h, + d, + sid, rafs_meta.to_string(), rafs_mnt, extra_options.config.clone(), diff --git a/src/runtime-rs/crates/resource/src/share_fs/mod.rs b/src/runtime-rs/crates/resource/src/share_fs/mod.rs index 4d70a6c7b409..81d35b09d574 100644 --- a/src/runtime-rs/crates/resource/src/share_fs/mod.rs +++ b/src/runtime-rs/crates/resource/src/share_fs/mod.rs @@ -22,12 +22,15 @@ pub mod sandbox_bind_mounts; use std::{collections::HashMap, fmt::Debug, path::PathBuf, sync::Arc}; -use agent::Storage; use anyhow::{anyhow, Context, Ok, Result}; use async_trait::async_trait; -use hypervisor::Hypervisor; +use tokio::sync::RwLock; + +use agent::Storage; use kata_types::config::hypervisor::SharedFsInfo; +use hypervisor::{device::device_manager::DeviceManager, Hypervisor}; + const VIRTIO_FS: &str = "virtio-fs"; const _VIRTIO_FS_NYDUS: &str = "virtio-fs-nydus"; const INLINE_VIRTIO_FS: &str = "inline-virtio-fs"; @@ -45,8 +48,16 @@ const RAFS_DIR: &str = "rafs"; #[async_trait] pub trait ShareFs: Send + Sync { fn get_share_fs_mount(&self) -> Arc; - async fn setup_device_before_start_vm(&self, h: &dyn Hypervisor) -> Result<()>; - async fn setup_device_after_start_vm(&self, h: &dyn Hypervisor) -> Result<()>; + async fn setup_device_before_start_vm( + &self, + h: &dyn Hypervisor, + d: &RwLock, + ) -> Result<()>; + async fn setup_device_after_start_vm( + &self, + h: &dyn Hypervisor, + d: &RwLock, + ) -> Result<()>; async fn get_storages(&self) -> Result>; fn mounted_info_set(&self) -> Arc>>; } diff --git a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs.rs b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs.rs index c0449fa3d0a7..96b2676e97e1 100644 --- a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs.rs +++ b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs.rs @@ -7,18 +7,18 @@ use std::path::Path; use anyhow::{Context, Result}; +use nix::mount::MsFlags; +use tokio::sync::RwLock; + use hypervisor::{ device::{ - driver::{ - ShareFsDevice, ShareFsMountConfig, ShareFsMountDevice, ShareFsMountType, - ShareFsOperation, - }, - DeviceType, + device_manager::{do_handle_device, do_update_device, DeviceManager}, + driver::{ShareFsMountConfig, ShareFsMountOperation, ShareFsMountType}, + DeviceConfig, }, - Hypervisor, ShareFsDeviceConfig, + ShareFsConfig, }; use kata_sys_util::mount; -use nix::mount::MsFlags; use super::{utils, PASSTHROUGH_FS_DIR}; @@ -35,7 +35,7 @@ pub(crate) fn generate_sock_path(root: &str) -> String { } pub(crate) async fn prepare_virtiofs( - h: &dyn Hypervisor, + d: &RwLock, fs_type: &str, id: &str, root: &str, @@ -49,24 +49,26 @@ pub(crate) async fn prepare_virtiofs( mount::bind_mount_unchecked(&host_rw_dest, &host_ro_dest, true, MsFlags::MS_SLAVE) .context("bind mount shared_fs directory")?; - let share_fs_device = ShareFsDevice { - config: ShareFsDeviceConfig { - sock_path: generate_sock_path(root), - mount_tag: String::from(MOUNT_GUEST_TAG), - host_path: String::from(host_ro_dest.to_str().unwrap()), - fs_type: fs_type.to_string(), - queue_size: 0, - queue_num: 0, - options: vec![], - }, + let sharefs_config = ShareFsConfig { + host_shared_path: host_ro_dest.display().to_string(), + sock_path: generate_sock_path(root), + mount_tag: String::from(MOUNT_GUEST_TAG), + fs_type: fs_type.to_string(), + queue_size: 0, + queue_num: 0, + options: vec![], + mount_config: None, }; - h.add_device(DeviceType::ShareFs(share_fs_device)) + + // create and insert virtio-fs device into Guest + do_handle_device(d, &DeviceConfig::ShareFsCfg(sharefs_config)) .await - .context("add device")?; + .context("do add virtio-fs device failed.")?; + Ok(()) } -pub(crate) async fn setup_inline_virtiofs(id: &str, h: &dyn Hypervisor) -> Result<()> { +pub(crate) async fn setup_inline_virtiofs(d: &RwLock, id: &str) -> Result<()> { // - source is the absolute path of PASSTHROUGH_FS_DIR on host, e.g. // /run/kata-containers/shared/sandboxes//passthrough // - mount point is the path relative to KATA_GUEST_SHARE_DIR in guest @@ -75,34 +77,39 @@ pub(crate) async fn setup_inline_virtiofs(id: &str, h: &dyn Hypervisor) -> Resul let rw_source = utils::get_host_rw_shared_path(id).join(PASSTHROUGH_FS_DIR); utils::ensure_dir_exist(&rw_source).context("ensure directory exist")?; - let ro_source = utils::get_host_ro_shared_path(id).join(PASSTHROUGH_FS_DIR); - let source = String::from(ro_source.to_str().unwrap()); - - let virtio_fs = ShareFsMountDevice { - config: ShareFsMountConfig { - source: source.clone(), - fstype: ShareFsMountType::PASSTHROUGH, - mount_point: mnt, - config: None, - tag: String::from(MOUNT_GUEST_TAG), - op: ShareFsOperation::Mount, - prefetch_list_path: None, - }, + let host_ro_shared_path = utils::get_host_ro_shared_path(id); + let source = host_ro_shared_path + .join(PASSTHROUGH_FS_DIR) + .display() + .to_string(); + + let virtiofs_mount = ShareFsMountConfig { + source: source.clone(), + fstype: ShareFsMountType::PASSTHROUGH, + mount_point: mnt, + config: None, + tag: String::from(MOUNT_GUEST_TAG), + op: ShareFsMountOperation::Mount, + prefetch_list_path: None, + }; + + let sharefs_config = ShareFsConfig { + host_shared_path: host_ro_shared_path.display().to_string(), + mount_config: Some(virtiofs_mount), + ..Default::default() }; - let result = h - .add_device(DeviceType::ShareFsMount(virtio_fs)) + // update virtio-fs device with ShareFsMountConfig + do_update_device(d, &DeviceConfig::ShareFsCfg(sharefs_config)) .await - .with_context(|| format!("fail to attach passthrough fs {:?}", source)); + .context("fail to attach passthrough fs.")?; - match result { - Ok(_) => Ok(()), - Err(e) => Err(e), - } + Ok(()) } pub async fn rafs_mount( - h: &dyn Hypervisor, + d: &RwLock, + sid: &str, rafs_meta: String, rafs_mnt: String, config_content: String, @@ -112,19 +119,28 @@ pub async fn rafs_mount( sl!(), "Attaching rafs meta file {} to virtio-fs device, rafs mount point {}", rafs_meta, rafs_mnt ); - let virtio_fs = ShareFsMountDevice { - config: ShareFsMountConfig { - source: rafs_meta.clone(), - fstype: ShareFsMountType::RAFS, - mount_point: rafs_mnt, - config: Some(config_content), - tag: String::from(MOUNT_GUEST_TAG), - op: ShareFsOperation::Mount, - prefetch_list_path, - }, + + let rafs_config = ShareFsMountConfig { + source: rafs_meta.clone(), + fstype: ShareFsMountType::RAFS, + mount_point: rafs_mnt, + config: Some(config_content), + tag: String::from(MOUNT_GUEST_TAG), + op: ShareFsMountOperation::Mount, + prefetch_list_path, + }; + + let host_shared_path = utils::get_host_ro_shared_path(sid).display().to_string(); + let sharefs_config = ShareFsConfig { + host_shared_path, + mount_config: Some(rafs_config), + ..Default::default() }; - h.add_device(DeviceType::ShareFsMount(virtio_fs)) + + // update virtio-fs device with ShareFsMountConfig + do_update_device(d, &DeviceConfig::ShareFsCfg(sharefs_config)) .await .with_context(|| format!("fail to attach rafs {:?}", rafs_meta))?; + Ok(()) } diff --git a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_inline.rs b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_inline.rs index 5dddefbfdd7c..fdac7c57bc86 100644 --- a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_inline.rs +++ b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_inline.rs @@ -6,12 +6,13 @@ use std::collections::HashMap; -use agent::Storage; use anyhow::{Context, Result}; use async_trait::async_trait; -use hypervisor::Hypervisor; +use tokio::sync::{Mutex, RwLock}; + +use agent::Storage; +use hypervisor::{device::device_manager::DeviceManager, Hypervisor}; use kata_types::config::hypervisor::SharedFsInfo; -use tokio::sync::Mutex; use super::{ share_virtio_fs::{ @@ -52,19 +53,30 @@ impl ShareFs for ShareVirtioFsInline { self.share_fs_mount.clone() } - async fn setup_device_before_start_vm(&self, h: &dyn Hypervisor) -> Result<()> { - prepare_virtiofs(h, INLINE_VIRTIO_FS, &self.config.id, "") + async fn setup_device_before_start_vm( + &self, + _h: &dyn Hypervisor, + d: &RwLock, + ) -> Result<()> { + prepare_virtiofs(d, INLINE_VIRTIO_FS, &self.config.id, "") .await .context("prepare virtiofs")?; + Ok(()) } - async fn setup_device_after_start_vm(&self, h: &dyn Hypervisor) -> Result<()> { - setup_inline_virtiofs(&self.config.id, h) + async fn setup_device_after_start_vm( + &self, + _h: &dyn Hypervisor, + d: &RwLock, + ) -> Result<()> { + setup_inline_virtiofs(d, &self.config.id) .await .context("setup inline virtiofs")?; + Ok(()) } + async fn get_storages(&self) -> Result> { // setup storage let mut storages: Vec = Vec::new(); diff --git a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs index db421ada36ae..d281403a7946 100644 --- a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs +++ b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs @@ -6,15 +6,8 @@ use std::{collections::HashMap, process::Stdio, sync::Arc}; -use crate::share_fs::share_virtio_fs::{ - prepare_virtiofs, FS_TYPE_VIRTIO_FS, KATA_VIRTIO_FS_DEV_TYPE, MOUNT_GUEST_TAG, -}; -use crate::share_fs::{KATA_GUEST_SHARE_DIR, VIRTIO_FS}; -use agent::Storage; use anyhow::{anyhow, Context, Result}; use async_trait::async_trait; -use hypervisor::Hypervisor; -use kata_types::config::hypervisor::SharedFsInfo; use tokio::{ io::{AsyncBufReadExt, BufReader}, process::{Child, Command}, @@ -24,10 +17,20 @@ use tokio::{ }, }; +use agent::Storage; +use hypervisor::{device::device_manager::DeviceManager, Hypervisor}; +use kata_types::config::hypervisor::SharedFsInfo; + use super::{ share_virtio_fs::generate_sock_path, utils::ensure_dir_exist, utils::get_host_ro_shared_path, virtio_fs_share_mount::VirtiofsShareMount, MountedInfo, ShareFs, ShareFsMount, }; +use crate::share_fs::{ + share_virtio_fs::{ + prepare_virtiofs, FS_TYPE_VIRTIO_FS, KATA_VIRTIO_FS_DEV_TYPE, MOUNT_GUEST_TAG, + }, + KATA_GUEST_SHARE_DIR, VIRTIO_FS, +}; #[derive(Debug, Clone)] pub struct ShareVirtioFsStandaloneConfig { @@ -172,15 +175,24 @@ impl ShareFs for ShareVirtioFsStandalone { self.share_fs_mount.clone() } - async fn setup_device_before_start_vm(&self, h: &dyn Hypervisor) -> Result<()> { - prepare_virtiofs(h, VIRTIO_FS, &self.config.id, &h.get_jailer_root().await?) + async fn setup_device_before_start_vm( + &self, + h: &dyn Hypervisor, + d: &RwLock, + ) -> Result<()> { + prepare_virtiofs(d, VIRTIO_FS, &self.config.id, &h.get_jailer_root().await?) .await .context("prepare virtiofs")?; self.setup_virtiofsd(h).await.context("setup virtiofsd")?; + Ok(()) } - async fn setup_device_after_start_vm(&self, _h: &dyn Hypervisor) -> Result<()> { + async fn setup_device_after_start_vm( + &self, + _h: &dyn Hypervisor, + _d: &RwLock, + ) -> Result<()> { Ok(()) } diff --git a/src/runtime-rs/crates/resource/src/volume/block_volume.rs b/src/runtime-rs/crates/resource/src/volume/block_volume.rs index d0e361b24326..fc79183d1ef4 100644 --- a/src/runtime-rs/crates/resource/src/volume/block_volume.rs +++ b/src/runtime-rs/crates/resource/src/volume/block_volume.rs @@ -35,7 +35,6 @@ impl BlockVolume { d: &RwLock, m: &oci::Mount, read_only: bool, - cid: &str, sid: &str, ) -> Result { let mnt_src: &str = &m.source; @@ -97,23 +96,16 @@ impl BlockVolume { .await .context("do handle device failed.")?; - // generate host guest shared path - let guest_path = generate_shared_path(m.destination.clone(), read_only, cid, sid) - .await - .context("generate host-guest shared path failed")?; - // storage let mut storage = agent::Storage { - mount_point: guest_path.clone(), + options: if read_only { + vec!["ro".to_string()] + } else { + Vec::new() + }, ..Default::default() }; - storage.options = if read_only { - vec!["ro".to_string()] - } else { - Vec::new() - }; - // As the true Block Device wrapped in DeviceType, we need to // get it out from the wrapper, and the device_id will be for // BlockVolume. @@ -127,6 +119,12 @@ impl BlockVolume { device_id = device.device_id; } + // generate host guest shared path + let guest_path = generate_shared_path(m.destination.clone(), read_only, &device_id, sid) + .await + .context("generate host-guest shared path failed")?; + storage.mount_point = guest_path.clone(); + // In some case, dest is device /dev/xxx if m.destination.clone().starts_with("/dev") { storage.fs_type = "bind".to_string(); diff --git a/src/runtime-rs/crates/resource/src/volume/mod.rs b/src/runtime-rs/crates/resource/src/volume/mod.rs index 17cf42a1ec95..490181a1dfcd 100644 --- a/src/runtime-rs/crates/resource/src/volume/mod.rs +++ b/src/runtime-rs/crates/resource/src/volume/mod.rs @@ -77,7 +77,7 @@ impl VolumeResource { } else if is_block_volume(m).context("block volume type")? { // handle block volume Arc::new( - block_volume::BlockVolume::new(d, m, read_only, cid, sid) + block_volume::BlockVolume::new(d, m, read_only, sid) .await .with_context(|| format!("new share fs volume {:?}", m))?, ) diff --git a/src/runtime-rs/crates/resource/src/volume/utils.rs b/src/runtime-rs/crates/resource/src/volume/utils.rs index 2121b02c2c43..d5f17d44b4d5 100644 --- a/src/runtime-rs/crates/resource/src/volume/utils.rs +++ b/src/runtime-rs/crates/resource/src/volume/utils.rs @@ -57,13 +57,13 @@ pub fn get_file_name>(src: P) -> Result { pub(crate) async fn generate_shared_path( dest: String, read_only: bool, - cid: &str, + device_id: &str, sid: &str, ) -> Result { let file_name = get_file_name(&dest).context("failed to get file name.")?; - let mount_name = generate_mount_path(cid, file_name.as_str()); - let guest_path = do_get_guest_path(&mount_name, cid, true, false); - let host_path = do_get_host_path(&mount_name, sid, cid, true, read_only); + let mount_name = generate_mount_path(device_id, file_name.as_str()); + let guest_path = do_get_guest_path(&mount_name, device_id, true, false); + let host_path = do_get_host_path(&mount_name, sid, device_id, true, read_only); if dest.starts_with("/dev") { fs::File::create(&host_path).context(format!("failed to create file {:?}", &host_path))?; diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 35a9ae5e19dd..c9cf7ac58485 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -80,6 +80,7 @@ QEMUBINDIR := $(PREFIXDEPS)/bin CLHBINDIR := $(PREFIXDEPS)/bin FCBINDIR := $(PREFIXDEPS)/bin ACRNBINDIR := $(PREFIXDEPS)/bin +STRATOVIRTBINDIR := $(PREFIXDEPS)/bin SYSCONFDIR := /etc LOCALSTATEDIR := /var @@ -103,6 +104,8 @@ GENERATED_VARS = \ CONFIG_QEMU_SNP_IN \ CONFIG_CLH_IN \ CONFIG_FC_IN \ + CONFIG_STRATOVIRT_IN \ + CONFIG_REMOTE_IN \ $(USER_VARS) SCRIPTS += $(COLLECT_SCRIPT) SCRIPTS_DIR := $(BINDIR) @@ -146,12 +149,14 @@ HYPERVISOR_ACRN = acrn HYPERVISOR_FC = firecracker HYPERVISOR_QEMU = qemu HYPERVISOR_CLH = cloud-hypervisor +HYPERVISOR_STRATOVIRT = stratovirt +HYPERVISOR_REMOTE = remote # Determines which hypervisor is specified in $(CONFIG_FILE). DEFAULT_HYPERVISOR ?= $(HYPERVISOR_QEMU) # List of hypervisors this build system can generate configuration for. -HYPERVISORS := $(HYPERVISOR_ACRN) $(HYPERVISOR_FC) $(HYPERVISOR_QEMU) $(HYPERVISOR_CLH) +HYPERVISORS := $(HYPERVISOR_ACRN) $(HYPERVISOR_FC) $(HYPERVISOR_QEMU) $(HYPERVISOR_CLH) $(HYPERVISOR_STRATOVIRT) $(HYPERVISOR_REMOTE) QEMUPATH := $(QEMUBINDIR)/$(QEMUCMD) QEMUVALIDHYPERVISORPATHS := [\"$(QEMUPATH)\"] @@ -177,6 +182,9 @@ ACRNVALIDHYPERVISORPATHS := [\"$(ACRNPATH)\"] ACRNCTLPATH := $(ACRNBINDIR)/$(ACRNCTLCMD) ACRNVALIDCTLPATHS := [\"$(ACRNCTLPATH)\"] +STRATOVIRTPATH = $(STRATOVIRTBINDIR)/$(STRATOVIRTCMD) +STRATOVIRTVALIDHYPERVISORPATHS := [\"$(STRATOVIRTPATH)\"] + # Default number of vCPUs DEFVCPUS := 1 # Default maximum number of vCPUs @@ -219,6 +227,7 @@ DEFVALIDENTROPYSOURCES := [\"/dev/urandom\",\"/dev/random\",\"\"] DEFDISABLEBLOCK := false DEFSHAREDFS_CLH_VIRTIOFS := virtio-fs DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs +DEFSHAREDFS_STRATOVIRT_VIRTIOFS := virtio-fs DEFSHAREDFS_QEMU_TDX_VIRTIOFS := virtio-9p DEFSHAREDFS_QEMU_SEV_VIRTIOFS := virtio-9p DEFSHAREDFS_QEMU_SNP_VIRTIOFS := virtio-9p @@ -335,6 +344,18 @@ ifneq (,$(QEMUCMD)) CONFIGS += $(CONFIG_QEMU_NVIDIA_GPU) + CONFIG_FILE_REMOTE = configuration-remote.toml + CONFIG_REMOTE = config/$(CONFIG_FILE_REMOTE) + CONFIG_REMOTE_IN = $(CONFIG_REMOTE).in + + CONFIG_PATH_REMOTE = $(abspath $(CONFDIR)/$(CONFIG_FILE_REMOTE)) + CONFIG_PATHS += $(CONFIG_PATH_REMOTE) + + SYSCONFIG_REMOTE = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_REMOTE)) + SYSCONFIG_PATHS += $(SYSCONFIG_REMOTE) + + CONFIGS += $(CONFIG_REMOTE) + # qemu-specific options (all should be suffixed by "_QEMU") DEFBLOCKSTORAGEDRIVER_QEMU := virtio-scsi DEFBLOCKDEVICEAIO_QEMU := io_uring @@ -381,6 +402,36 @@ ifneq (,$(CLHCMD)) KERNELPATH_CLH = $(KERNELDIR)/$(KERNEL_NAME_CLH) endif +ifneq (,$(STRATOVIRTCMD)) + KNOWN_HYPERVISORS += $(HYPERVISOR_STRATOVIRT) + + CONFIG_FILE_STRATOVIRT = configuration-stratovirt.toml + CONFIG_STRATOVIRT = config/$(CONFIG_FILE_STRATOVIRT) + CONFIG_STRATOVIRT_IN = $(CONFIG_STRATOVIRT).in + + CONFIG_PATH_STRATOVIRT = $(abspath $(CONFDIR)/$(CONFIG_FILE_STRATOVIRT)) + CONFIG_PATHS += $(CONFIG_PATH_STRATOVIRT) + + SYSCONFIG_STRATOVIRT = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_STRATOVIRT)) + SYSCONFIG_PATHS += $(SYSCONFIG_STRATOVIRT) + + CONFIGS += $(CONFIG_STRATOVIRT) + + # stratovirt-specific options (all should be suffixed by "_STRATOVIRT") + DEFMACHINETYPE_STRATOVIRT := microvm + DEFBLOCKSTORAGEDRIVER_STRATOVIRT := virtio-mmio + DEFNETWORKMODEL_STRATOVIRT := tcfilter + DEFSTATICRESOURCEMGMT_STRATOVIRT = true +ifeq ($(ARCH),amd64) + KERNELTYPE_STRATOVIRT = compressed +endif +ifeq ($(ARCH),arm64) + KERNELTYPE_STRATOVIRT = uncompressed +endif + KERNEL_NAME_STRATOVIRT = $(call MAKE_KERNEL_NAME,$(KERNELTYPE_STRATOVIRT)) + KERNELPATH_STRATOVIRT = $(KERNELDIR)/$(KERNEL_NAME_STRATOVIRT) +endif + ifneq (,$(FCCMD)) KNOWN_HYPERVISORS += $(HYPERVISOR_FC) @@ -479,8 +530,10 @@ USER_VARS += BINDIR USER_VARS += CONFIG_ACRN_IN USER_VARS += CONFIG_CLH_IN USER_VARS += CONFIG_FC_IN +USER_VARS += CONFIG_STRATOVIRT_IN USER_VARS += CONFIG_PATH USER_VARS += CONFIG_QEMU_IN +USER_VARS += CONFIG_REMOTE_IN USER_VARS += DESTDIR USER_VARS += DEFAULT_HYPERVISOR USER_VARS += ACRNCMD @@ -497,6 +550,8 @@ USER_VARS += FCPATH USER_VARS += FCVALIDHYPERVISORPATHS USER_VARS += FCJAILERPATH USER_VARS += FCVALIDJAILERPATHS +USER_VARS += STRATOVIRTPATH +USER_VARS += STRATOVIRTVALIDHYPERVISORPATHS USER_VARS += SYSCONFIG USER_VARS += IMAGENAME USER_VARS += IMAGETDXNAME @@ -520,6 +575,7 @@ USER_VARS += KERNELTDXPATH USER_VARS += KERNELSNPPATH USER_VARS += KERNELPATH_CLH USER_VARS += KERNELPATH_FC +USER_VARS += KERNELPATH_STRATOVIRT USER_VARS += KERNELVIRTIOFSPATH USER_VARS += FIRMWAREPATH USER_VARS += FIRMWARESEVPATH @@ -531,6 +587,7 @@ USER_VARS += MACHINEACCELERATORS USER_VARS += CPUFEATURES USER_VARS += TDXCPUFEATURES USER_VARS += DEFMACHINETYPE_CLH +USER_VARS += DEFMACHINETYPE_STRATOVIRT USER_VARS += KERNELPARAMS USER_VARS += KERNELTDXPARAMS USER_VARS += LIBEXECDIR @@ -572,6 +629,7 @@ USER_VARS += DEFNETWORKMODEL_ACRN USER_VARS += DEFNETWORKMODEL_CLH USER_VARS += DEFNETWORKMODEL_FC USER_VARS += DEFNETWORKMODEL_QEMU +USER_VARS += DEFNETWORKMODEL_STRATOVIRT USER_VARS += DEFDISABLEGUESTEMPTYDIR USER_VARS += DEFDISABLEGUESTSECCOMP USER_VARS += DEFDISABLESELINUX @@ -582,9 +640,11 @@ USER_VARS += DEFDISABLEBLOCK USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN USER_VARS += DEFBLOCKSTORAGEDRIVER_FC USER_VARS += DEFBLOCKSTORAGEDRIVER_QEMU +USER_VARS += DEFBLOCKSTORAGEDRIVER_STRATOVIRT USER_VARS += DEFBLOCKDEVICEAIO_QEMU USER_VARS += DEFSHAREDFS_CLH_VIRTIOFS USER_VARS += DEFSHAREDFS_QEMU_VIRTIOFS +USER_VARS += DEFSHAREDFS_STRATOVIRT_VIRTIOFS USER_VARS += DEFSHAREDFS_QEMU_TDX_VIRTIOFS USER_VARS += DEFSHAREDFS_QEMU_SEV_VIRTIOFS USER_VARS += DEFSHAREDFS_QEMU_SNP_VIRTIOFS @@ -609,6 +669,7 @@ USER_VARS += DEFSANDBOXCGROUPONLY USER_VARS += DEFSTATICRESOURCEMGMT USER_VARS += DEFSTATICRESOURCEMGMT_CLH USER_VARS += DEFSTATICRESOURCEMGMT_FC +USER_VARS += DEFSTATICRESOURCEMGMT_STRATOVIRT USER_VARS += DEFSTATICRESOURCEMGMT_TEE USER_VARS += DEFBINDMOUNTS USER_VARS += DEFSERVICEOFFLOAD @@ -940,6 +1001,9 @@ ifneq (,$(findstring $(HYPERVISOR_FC),$(KNOWN_HYPERVISORS))) endif ifneq (,$(findstring $(HYPERVISOR_ACRN),$(KNOWN_HYPERVISORS))) @printf "\t$(HYPERVISOR_ACRN) hypervisor path (ACRNPATH) : %s\n" $(abspath $(ACRNPATH)) +endif +ifneq (,$(findstring $(HYPERVISOR_STRATOVIRT),$(KNOWN_HYPERVISORS))) + @printf "\t$(HYPERVISOR_STRATOVIRT) hypervisor path (STRATOVIRTPATH) : %s\n" $(abspath $(STRATOVIRTPATH)) endif @printf "\tassets path (PKGDATADIR) : %s\n" $(abspath $(PKGDATADIR)) @printf "\tshim path (PKGLIBEXECDIR) : %s\n" $(abspath $(PKGLIBEXECDIR)) diff --git a/src/runtime/arch/amd64-options.mk b/src/runtime/arch/amd64-options.mk index e6068158cf4e..940e87a41a17 100644 --- a/src/runtime/arch/amd64-options.mk +++ b/src/runtime/arch/amd64-options.mk @@ -28,3 +28,6 @@ ACRNCTLCMD := acrnctl CLHCMD := cloud-hypervisor DEFSTATICRESOURCEMGMT_CLH := false + +# stratovirt binary name +STRATOVIRTCMD := stratovirt diff --git a/src/runtime/arch/arm64-options.mk b/src/runtime/arch/arm64-options.mk index 7f74ae311168..895c93f82e9a 100644 --- a/src/runtime/arch/arm64-options.mk +++ b/src/runtime/arch/arm64-options.mk @@ -21,3 +21,6 @@ FCJAILERCMD := jailer CLHCMD := cloud-hypervisor DEFSTATICRESOURCEMGMT_CLH := true + +# stratovirt binary name +STRATOVIRTCMD := stratovirt diff --git a/src/runtime/cmd/kata-runtime/kata-check_amd64.go b/src/runtime/cmd/kata-runtime/kata-check_amd64.go index fcdb047fbef1..e2c2c26df284 100644 --- a/src/runtime/cmd/kata-runtime/kata-check_amd64.go +++ b/src/runtime/cmd/kata-runtime/kata-check_amd64.go @@ -115,6 +115,8 @@ func setCPUtype(hypervisorType vc.HypervisorType) error { } switch hypervisorType { + case vc.StratovirtHypervisor: + fallthrough case vc.FirecrackerHypervisor: fallthrough case vc.ClhHypervisor: @@ -315,10 +317,14 @@ func archHostCanCreateVMContainer(hypervisorType vc.HypervisorType) error { fallthrough case vc.ClhHypervisor: fallthrough + case vc.StratovirtHypervisor: + fallthrough case vc.FirecrackerHypervisor: return kvmIsUsable() case vc.AcrnHypervisor: return acrnIsUsable() + case vc.RemoteHypervisor: + return nil case vc.MockHypervisor: return nil default: diff --git a/src/runtime/cmd/kata-runtime/kata-check_arm64.go b/src/runtime/cmd/kata-runtime/kata-check_arm64.go index 66d81c71c3d1..933c9776095a 100644 --- a/src/runtime/cmd/kata-runtime/kata-check_arm64.go +++ b/src/runtime/cmd/kata-runtime/kata-check_arm64.go @@ -86,6 +86,9 @@ func checkKVMExtensions() error { } func archHostCanCreateVMContainer(hypervisorType vc.HypervisorType) error { + if hypervisorType == "remote" { + return nil + } if err := kvmIsUsable(); err != nil { return err } diff --git a/src/runtime/cmd/kata-runtime/kata-check_ppc64le.go b/src/runtime/cmd/kata-runtime/kata-check_ppc64le.go index 7c5e7453ac63..de34f9614f74 100644 --- a/src/runtime/cmd/kata-runtime/kata-check_ppc64le.go +++ b/src/runtime/cmd/kata-runtime/kata-check_ppc64le.go @@ -61,6 +61,9 @@ func setCPUtype(hypervisorType vc.HypervisorType) error { } func archHostCanCreateVMContainer(hypervisorType vc.HypervisorType) error { + if hypervisorType == "remote" { + return nil + } return kvmIsUsable() } diff --git a/src/runtime/cmd/kata-runtime/kata-check_s390x.go b/src/runtime/cmd/kata-runtime/kata-check_s390x.go index c9b1578b43f7..8ee51b7e5c32 100644 --- a/src/runtime/cmd/kata-runtime/kata-check_s390x.go +++ b/src/runtime/cmd/kata-runtime/kata-check_s390x.go @@ -55,6 +55,9 @@ func kvmIsUsable() error { } func archHostCanCreateVMContainer(hypervisorType vc.HypervisorType) error { + if hypervisorType == "remote" { + return nil + } return kvmIsUsable() } diff --git a/src/runtime/config/configuration-remote.toml.in b/src/runtime/config/configuration-remote.toml.in new file mode 100644 index 000000000000..4c75af275ec1 --- /dev/null +++ b/src/runtime/config/configuration-remote.toml.in @@ -0,0 +1,318 @@ +# Copyright (c) 2017-2019 Intel Corporation +# Copyright (c) 2023 IBM Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +# XXX: WARNING: this file is auto-generated. +# XXX: +# XXX: Source file: "@CONFIG_REMOTE_IN@" +# XXX: Project: +# XXX: Name: @PROJECT_NAME@ +# XXX: Type: @PROJECT_TYPE@ + + +[hypervisor.remote] +remote_hypervisor_socket = "/run/peerpod/hypervisor.sock" +remote_hypervisor_timeout = 600 + + +# Enable confidential guest support. +# Toggling that setting may trigger different hardware features, ranging +# from memory encryption to both memory and CPU-state encryption and integrity. +# The Kata Containers runtime dynamically detects the available feature set and +# aims at enabling the largest possible one, returning an error if none is +# available, or none is supported by the hypervisor. +# +# Known limitations: +# * Does not work by design: +# - CPU Hotplug +# - Memory Hotplug +# - NVDIMM devices +# +# Default false +# confidential_guest = true + + +# List of valid annotation names for the hypervisor +# Each member of the list is a regular expression, which is the base name +# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" +# Note: Remote hypervisor is only handling the following annotations +enable_annotations = ["machine_type", "default_memory", "default_vcpus"] + +# Optional space-separated list of options to pass to the guest kernel. +# For example, use `kernel_params = "vsyscall=emulate"` if you are having +# trouble running pre-2.15 glibc. +# +# WARNING: - any parameter specified here will take priority over the default +# parameter value of the same name used to start the virtual machine. +# Do not set values here unless you understand the impact of doing so as you +# may stop the virtual machine from booting. +# To see the list of default parameters, enable hypervisor debug, create a +# container and look for 'default-kernel-parameters' log entries. +# NOTE: kernel_params are not currently passed over in remote hypervisor +# kernel_params = "" + +# Path to the firmware. +# If you want that qemu uses the default firmware leave this option empty +firmware = "@FIRMWAREPATH@" + +# Default number of vCPUs per SB/VM: +# unspecified or 0 --> will be set to @DEFVCPUS@ +# < 0 --> will be set to the actual number of physical cores +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores +# default_vcpus = 1 + +# Default maximum number of vCPUs per SB/VM: +# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when +# the actual number of physical cores is greater than it. +# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU +# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs +# can be added to a SB/VM, but the memory footprint will be big. Another example, with +# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of +# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable, +# unless you know what are you doing. +# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8. +# default_maxvcpus = @DEFMAXVCPUS@ + +# Bridges can be used to hot plug devices. +# Limitations: +# * Currently only pci bridges are supported +# * Until 30 devices per bridge can be hot plugged. +# * Until 5 PCI bridges can be cold plugged per VM. +# This limitation could be a bug in qemu or in the kernel +# Default number of bridges per SB/VM: +# unspecified or 0 --> will be set to @DEFBRIDGES@ +# > 1 <= 5 --> will be set to the specified number +# > 5 --> will be set to 5 +default_bridges = @DEFBRIDGES@ + +# Default memory size in MiB for SB/VM. +# If unspecified then it will be set @DEFMEMSZ@ MiB. +# Note: the remote hypervisor uses the peer pod config to determine the memory of the VM +# default_memory = @DEFMEMSZ@ +# +# Default memory slots per SB/VM. +# If unspecified then it will be set @DEFMEMSLOTS@. +# This is will determine the times that memory will be hotadded to sandbox/VM. +# Note: the remote hypervisor uses the peer pod config to determine the memory of the VM +#memory_slots = @DEFMEMSLOTS@ + +# This option changes the default hypervisor and kernel parameters +# to enable debug output where available. And Debug also enable the hmp socket. +# +# Default false +#enable_debug = true + +# Path to OCI hook binaries in the *guest rootfs*. +# This does not affect host-side hooks which must instead be added to +# the OCI spec passed to the runtime. +# +# You can create a rootfs with hooks by customizing the osbuilder scripts: +# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder +# +# Hooks must be stored in a subdirectory of guest_hook_path according to their +# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}". +# The agent will scan these directories for executable files and add them, in +# lexicographical order, to the lifecycle of the guest container. +# Hooks are executed in the runtime namespace of the guest. See the official documentation: +# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks +# Warnings will be logged if any error is encountered while scanning for hooks, +# but it will not abort container execution. +#guest_hook_path = "/usr/share/oci/hooks" + +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +# Note: The remote hypervisor has a different guest, so currently requires this to be disabled +disable_guest_selinux = true + +[agent.@PROJECT_TYPE@] +# If enabled, make the agent display debug-level messages. +# (default: disabled) +#enable_debug = true + +# Enable agent tracing. +# +# If enabled, the agent will generate OpenTelemetry trace spans. +# +# Notes: +# +# - If the runtime also has tracing enabled, the agent spans will be +# associated with the appropriate runtime parent span. +# - If enabled, the runtime will wait for the container to shutdown, +# increasing the container shutdown time slightly. +# +# (default: disabled) +#enable_tracing = true + +# Enable debug console. + +# If enabled, user can connect guest OS running inside hypervisor +# through "kata-runtime exec " command + +#debug_console_enabled = true + +# Agent connection dialing timeout value in seconds +# (default: 30) +#dial_timeout = 30 + +[runtime] +# If enabled, the runtime will log additional debug messages to the +# system log +# (default: disabled) +#enable_debug = true +# +# Internetworking model +# Determines how the VM should be connected to the +# the container network interface +# Options: +# +# - macvtap +# Used when the Container network interface can be bridged using +# macvtap. +# +# - none +# Used when customize network. Only creates a tap device. No veth pair. +# +# - tcfilter +# Uses tc filter rules to redirect traffic from the network interface +# provided by plugin to a tap interface connected to the VM. +# +# Note: The remote hypervisor, uses it's own network, so "none" is required +internetworking_model="none" + +# disable guest seccomp +# Determines whether container seccomp profiles are passed to the virtual +# machine and applied by the kata agent. If set to true, seccomp is not applied +# within the guest +# (default: true) +# Note: The remote hypervisor has a different guest, so currently requires this to be set to true +disable_guest_seccomp=true + + +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label="@DEFGUESTSELINUXLABEL@" + +# If enabled, the runtime will create opentracing.io traces and spans. +# (See https://www.jaegertracing.io/docs/getting-started). +# (default: disabled) +#enable_tracing = true + +# Set the full url to the Jaeger HTTP Thrift collector. +# The default if not set will be "http://localhost:14268/api/traces" +#jaeger_endpoint = "" + +# Sets the username to be used if basic auth is required for Jaeger. +#jaeger_user = "" + +# Sets the password to be used if basic auth is required for Jaeger. +#jaeger_password = "" + +# If enabled, the runtime will not create a network namespace for shim and hypervisor processes. +# This option may have some potential impacts to your host. It should only be used when you know what you're doing. +# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only +# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge +# (like OVS) directly. +# (default: false) +# Note: The remote hypervisor has a different networking model, which requires true +disable_new_netns = true + +# if enabled, the runtime will add all the kata processes inside one dedicated cgroup. +# The container cgroups in the host are not created, just one single cgroup per sandbox. +# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. +# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. +# The sandbox cgroup is constrained if there is no container type annotation. +# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType +sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ + +# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In +# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful +# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug. +# Compatibility for determining appropriate sandbox (VM) size: +# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O +# does not yet support sandbox sizing annotations. +# - When running single containers using a tool like ctr, container sizing information will be available. +# Note: the remote hypervisor uses the peer pod config to determine the sandbox size, so requires this to be set to true +static_sandbox_resource_mgmt=true + +# VFIO Mode +# Determines how VFIO devices should be be presented to the container. +# Options: +# +# - vfio +# Matches behaviour of OCI runtimes (e.g. runc) as much as +# possible. VFIO devices will appear in the container as VFIO +# character devices under /dev/vfio. The exact names may differ +# from the host (they need to match the VM's IOMMU group numbers +# rather than the host's) +# +# - guest-kernel +# This is a Kata-specific behaviour that's useful in certain cases. +# The VFIO device is managed by whatever driver in the VM kernel +# claims it. This means it will appear as one or more device nodes +# or network interfaces depending on the nature of the device. +# Using this mode requires specially built workloads that know how +# to locate the relevant device interfaces within the VM. +# +vfio_mode="@DEFVFIOMODE@" + +# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will +# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest. +# Note: remote hypervisor has no sharing of emptydir mounts from host to guest +disable_guest_empty_dir=false + +# Enabled experimental feature list, format: ["a", "b"]. +# Experimental features are features not stable enough for production, +# they may break compatibility, and are prepared for a big version bump. +# Supported experimental features: +# (default: []) +experimental=@DEFAULTEXPFEATURES@ + +# If enabled, user can run pprof tools with shim v2 process through kata-monitor. +# (default: false) +# enable_pprof = true + +# WARNING: All the options in the following section have not been implemented yet. +# This section was added as a placeholder. DO NOT USE IT! +[image] +# Container image service. +# +# Offload the CRI image management service to the Kata agent. +# (default: false) +# Note: The remote hypervisor offloads the pulling on images on the peer pod VM, so requries this to be true +service_offload = true + +# Container image decryption keys provisioning. +# Applies only if service_offload is true. +# Keys can be provisioned locally (e.g. through a special command or +# a local file) or remotely (usually after the guest is remotely attested). +# The provision setting is a complete URL that lets the Kata agent decide +# which method to use in order to fetch the keys. +# +# Keys can be stored in a local file, in a measured and attested initrd: +#provision=data:///local/key/file +# +# Keys could be fetched through a special command or binary from the +# initrd (guest) image, e.g. a firmware call: +#provision=file:///path/to/bin/fetcher/in/guest +# +# Keys can be remotely provisioned. The Kata agent fetches them from e.g. +# a HTTPS URL: +#provision=https://my-key-broker.foo/tenant/ diff --git a/src/runtime/config/configuration-stratovirt.toml.in b/src/runtime/config/configuration-stratovirt.toml.in new file mode 100644 index 000000000000..c98cf6bd7828 --- /dev/null +++ b/src/runtime/config/configuration-stratovirt.toml.in @@ -0,0 +1,394 @@ +# Copyright (c) 2023 Huawei Technologies Co.,Ltd. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# XXX: WARNING: this file is auto-generated. +# XXX: +# XXX: Source file: "@CONFIG_STRATOVIRT_IN@" +# XXX: Project: +# XXX: Name: @PROJECT_NAME@ +# XXX: Type: @PROJECT_TYPE@ + +[hypervisor.stratovirt] +path = "@STRATOVIRTPATH@" +kernel = "@KERNELPATH_STRATOVIRT@" +#image = "@IMAGEPATH@" +initrd = "@INITRDPATH@" +machine_type = "@DEFMACHINETYPE_STRATOVIRT@" + +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type = @DEFROOTFSTYPE@ + +# List of valid annotation names for the hypervisor +# Each member of the list is a regular expression, which is the base name +# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" +enable_annotations = @DEFENABLEANNOTATIONS@ + +# List of valid annotations values for the hypervisor +# Each member of the list is a path pattern as described by glob(3). +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @STRATOVIRTVALIDHYPERVISORPATHS@ +valid_hypervisor_paths = @STRATOVIRTVALIDHYPERVISORPATHS@ + +# Optional space-separated list of options to pass to the guest kernel. +# For example, use `kernel_params = "vsyscall=emulate"` if you are having +# trouble running pre-2.15 glibc. +# +# WARNING: - any parameter specified here will take priority over the default +# parameter value of the same name used to start the virtual machine. +# Do not set values here unless you understand the impact of doing so as you +# may stop the virtual machine from booting. +# To see the list of default parameters, enable hypervisor debug, create a +# container and look for 'default-kernel-parameters' log entries. +kernel_params = "@KERNELPARAMS@" + +# Default number of vCPUs per SB/VM: +# unspecified or 0 --> will be set to @DEFVCPUS@ +# < 0 --> will be set to the actual number of physical cores +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores +default_vcpus = 1 + +# Default maximum number of vCPUs per SB/VM: +# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when +# the actual number of physical cores is greater than it. +# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU +# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs +# can be added to a SB/VM, but the memory footprint will be big. Another example, with +# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of +# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable, +# unless you know what are you doing. +# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8. +default_maxvcpus = @DEFMAXVCPUS@ + +# Bridges can be used to hot plug devices. +# Limitations: +# * Currently only pci bridges are supported +# * Until 30 devices per bridge can be hot plugged. +# * Until 5 PCI bridges can be cold plugged per VM. +# This limitation could be a bug in the kernel +# Default number of bridges per SB/VM: +# unspecified or 0 --> will be set to @DEFBRIDGES@ +# > 1 <= 5 --> will be set to the specified number +# > 5 --> will be set to 5 +default_bridges = @DEFBRIDGES@ + +# Default memory size in MiB for SB/VM. +# If unspecified then it will be set @DEFMEMSZ@ MiB. +default_memory = @DEFMEMSZ@ +# +# Default memory slots per SB/VM. +# If unspecified then it will be set @DEFMEMSLOTS@. +# This is will determine the times that memory will be hotadded to sandbox/VM. +#memory_slots = @DEFMEMSLOTS@ + +# Default maximum memory in MiB per SB / VM +# unspecified or == 0 --> will be set to the actual amount of physical RAM +# > 0 <= amount of physical RAM --> will be set to the specified number +# > amount of physical RAM --> will be set to the actual amount of physical RAM +default_maxmemory = @DEFMAXMEMSZ@ + +# The size in MiB will be plused to max memory of hypervisor. +# It is the memory address space for the NVDIMM devie. +# If set block storage driver (block_device_driver) to "nvdimm", +# should set memory_offset to the size of block device. +# Default 0 +#memory_offset = 0 + +# Disable block device from being used for a container's rootfs. +# In case of a storage driver like devicemapper where a container's +# root file system is backed by a block device, the block device is passed +# directly to the hypervisor for performance reasons. +# This flag prevents the block device from being passed to the hypervisor, +# virtio-fs is used instead to pass the rootfs. +disable_block_device_use = @DEFDISABLEBLOCK@ + +# Shared file system type: +# - virtio-fs (default) +# - virtio-fs-nydus +# - none +shared_fs = "@DEFSHAREDFS_STRATOVIRT_VIRTIOFS@" + +# Path to vhost-user-fs daemon. +virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@" + +# List of valid annotations values for the virtiofs daemon +# The default if not set is empty (all annotations rejected.) +valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@ + +# Default size of DAX cache in MiB +virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@ + +# Extra args for virtiofsd daemon +# +# Format example: +# ["--arg1=xxx", "--arg2=yyy"] +# Examples: +# Set virtiofsd log level to debug : ["--log-level=debug"] +# +# see `virtiofsd -h` for possible options. +virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@ + +# Cache mode: +# +# - never +# Metadata, data, and pathname lookup are not cached in guest. They are +# always fetched from host and any changes are immediately pushed to host. +# +# - auto +# Metadata and pathname lookup cache expires after a configured amount of +# time (default is 1 second). Data is cached while the file is open (close +# to open consistency). +# +# - always +# Metadata, data, and pathname lookup are cached in guest and never expire. +virtio_fs_cache = "@DEFVIRTIOFSCACHE@" + +# Block storage driver to be used for the hypervisor in case the container +# rootfs is backed by a block device. This is virtio-scsi, virtio-blk +# or nvdimm. +block_device_driver = "@DEFBLOCKSTORAGEDRIVER_STRATOVIRT@" + +# Specifies cache-related options will be set to block devices or not. +# Default false +#block_device_cache_set = true + +# Specifies cache-related options for block devices. +# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled. +# Default false +#block_device_cache_direct = true + +# Specifies cache-related options for block devices. +# Denotes whether flush requests for the device are ignored. +# Default false +#block_device_cache_noflush = true + +# Enable huge pages for VM RAM, default false +# Enabling this will result in the VM memory +# being allocated using huge pages. +# This is useful when you want to use vhost-user network +# stacks within the container. This will automatically +# result in memory pre allocation +#enable_hugepages = true + +# Enable vIOMMU, default false +# Enabling this will result in the VM having a vIOMMU device +# This will also add the following options to the kernel's +# command line: intel_iommu=on,iommu=pt +#enable_iommu = true + +# This option changes the default hypervisor and kernel parameters +# to enable debug output where available. +# +# Default false +#enable_debug = true + +# Disable the customizations done in the runtime when it detects +# that it is running on top a VMM. This will result in the runtime +# behaving as it would when running on bare metal. +# +#disable_nesting_checks = true + +# +# Default entropy source. +# The path to a host source of entropy (including a real hardware RNG) +# /dev/urandom and /dev/random are two main options. +# Be aware that /dev/random is a blocking source of entropy. If the host +# runs out of entropy, the VMs boot time will increase leading to get startup +# timeouts. +# The source of entropy /dev/urandom is non-blocking and provides a +# generally acceptable source of entropy. It should work well for pretty much +# all practical purposes. +entropy_source = "@DEFENTROPYSOURCE@" + +# Path to OCI hook binaries in the *guest rootfs*. +# This does not affect host-side hooks which must instead be added to +# the OCI spec passed to the runtime. +# +# You can create a rootfs with hooks by customizing the osbuilder scripts: +# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder +# +# Hooks must be stored in a subdirectory of guest_hook_path according to their +# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}". +# The agent will scan these directories for executable files and add them, in +# lexicographical order, to the lifecycle of the guest container. +# Hooks are executed in the runtime namespace of the guest. See the official documentation: +# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks +# Warnings will be logged if any error is encountered while scanning for hooks, +# but it will not abort container execution. +#guest_hook_path = "/usr/share/oci/hooks" + +# disable applying SELinux on the VMM process (default false) +disable_selinux = @DEFDISABLESELINUX@ + +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +disable_guest_selinux = @DEFDISABLEGUESTSELINUX@ + +[factory] +# VM templating support. Once enabled, new VMs are created from template +# using vm cloning. They will share the same initial kernel, initramfs and +# agent memory by mapping it readonly. It helps speeding up new container +# creation and saves a lot of memory if there are many kata containers running +# on the same host. +# +# When disabled, new VMs are created from scratch. +# +# Note: Requires "initrd=" to be set ("image=" is not supported). +# +# Default false +#enable_template = true + +[agent.@PROJECT_TYPE@] +# If enabled, make the agent display debug-level messages. +# (default: disabled) +#enable_debug = true + +# Enable agent tracing. +# +# If enabled, the agent will generate OpenTelemetry trace spans. +# +# Notes: +# +# - If the runtime also has tracing enabled, the agent spans will be +# associated with the appropriate runtime parent span. +# - If enabled, the runtime will wait for the container to shutdown, +# increasing the container shutdown time slightly. +# +# (default: disabled) +#enable_tracing = true + +# Comma separated list of kernel modules and their parameters. +# These modules will be loaded in the guest kernel using modprobe(8). +# The following example can be used to load two kernel modules with parameters +# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"] +# The first word is considered as the module name and the rest as its parameters. +# Container will not be started when: +# * A kernel module is specified and the modprobe command is not installed in the guest +# or it fails loading the module. +# * The module is not available in the guest or it doesn't met the guest kernel +# requirements, like architecture and version. +# +kernel_modules = [] + +# Enable debug console. + +# If enabled, user can connect guest OS running inside hypervisor +# through "kata-runtime exec " command + +#debug_console_enabled = true + +# Agent connection dialing timeout value in seconds +# (default: 45) +dial_timeout = 45 + +[runtime] +# If enabled, the runtime will log additional debug messages to the +# system log +# (default: disabled) +#enable_debug = true +# +# Internetworking model +# Determines how the VM should be connected to the +# the container network interface +# Options: +# +# - macvtap +# Used when the Container network interface can be bridged using +# macvtap. +# +# - none +# Used when customize network. Only creates a tap device. No veth pair. +# +# - tcfilter +# Uses tc filter rules to redirect traffic from the network interface +# provided by plugin to a tap interface connected to the VM. +# +internetworking_model = "@DEFNETWORKMODEL_STRATOVIRT@" + +# disable guest seccomp +# Determines whether container seccomp profiles are passed to the virtual +# machine and applied by the kata agent. If set to true, seccomp is not applied +# within the guest +# (default: true) +disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@ + +# vCPUs pinning settings +# if enabled, each vCPU thread will be scheduled to a fixed CPU +# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet) +#enable_vcpus_pinning = false + +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label = "@DEFGUESTSELINUXLABEL@" + +# If enabled, the runtime will create opentracing.io traces and spans. +# (See https://www.jaegertracing.io/docs/getting-started). +# (default: disabled) +#enable_tracing = true + +# Set the full url to the Jaeger HTTP Thrift collector. +# The default if not set will be "http://localhost:14268/api/traces" +#jaeger_endpoint = "" + +# Sets the username to be used if basic auth is required for Jaeger. +#jaeger_user = "" + +# Sets the password to be used if basic auth is required for Jaeger. +#jaeger_password = "" + +# If enabled, the runtime will not create a network namespace for shim and hypervisor processes. +# This option may have some potential impacts to your host. It should only be used when you know what you're doing. +# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only +# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge +# (like OVS) directly. +# (default: false) +#disable_new_netns = true + +# if enabled, the runtime will add all the kata processes inside one dedicated cgroup. +# The container cgroups in the host are not created, just one single cgroup per sandbox. +# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. +# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. +# The sandbox cgroup is constrained if there is no container type annotation. +# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType +sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY@ + +# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In +# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful +# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug. +# Compatibility for determining appropriate sandbox (VM) size: +# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O +# does not yet support sandbox sizing annotations. +# - When running single containers using a tool like ctr, container sizing information will be available. +static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_STRATOVIRT@ + +# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will +# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest. +disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@ + +# Enabled experimental feature list, format: ["a", "b"]. +# Experimental features are features not stable enough for production, +# they may break compatibility, and are prepared for a big version bump. +# Supported experimental features: +# (default: []) +experimental = @DEFAULTEXPFEATURES@ + +# If enabled, user can run pprof tools with shim v2 process through kata-monitor. +# (default: false) +#enable_pprof = true diff --git a/src/runtime/hack/update-generated-hypervisor-proto.sh b/src/runtime/hack/update-generated-hypervisor-proto.sh new file mode 100755 index 000000000000..172ea6eefa99 --- /dev/null +++ b/src/runtime/hack/update-generated-hypervisor-proto.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# (C) Copyright IBM Corp. 2022, 2023 +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit -o pipefail -o nounset + +HYPERVISOR_PATH="protocols/hypervisor" + +protoc \ + -I=$GOPATH/src \ + --proto_path=$HYPERVISOR_PATH \ + --go_out=$HYPERVISOR_PATH \ + --go-grpc_out=$HYPERVISOR_PATH \ + $HYPERVISOR_PATH/hypervisor.proto diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index 58faec56c796..a6d2ece25ad7 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -110,3 +110,6 @@ var defaultRuntimeConfiguration = "@CONFIG_PATH@" const defaultHotPlugVFIO = config.NoPort const defaultColdPlugVFIO = config.NoPort + +const defaultRemoteHypervisorSocket = "/run/peerpod/hypervisor.sock" +const defaultRemoteHypervisorTimeout = 600 diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 1afec55d5076..f7782ed1f0c6 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -52,6 +52,8 @@ const ( qemuHypervisorTableType = "qemu" acrnHypervisorTableType = "acrn" dragonballHypervisorTableType = "dragonball" + stratovirtHypervisorTableType = "stratovirt" + remoteHypervisorTableType = "remote" // the maximum amount of PCI bridges that can be cold plugged in a VM maxPCIBridges uint32 = 5 @@ -104,6 +106,7 @@ type hypervisor struct { GuestMemoryDumpPath string `toml:"guest_memory_dump_path"` SeccompSandbox string `toml:"seccompsandbox"` BlockDeviceAIO string `toml:"block_device_aio"` + RemoteHypervisorSocket string `toml:"remote_hypervisor_socket"` HypervisorPathList []string `toml:"valid_hypervisor_paths"` JailerPathList []string `toml:"valid_jailer_paths"` CtlPathList []string `toml:"valid_ctlpaths"` @@ -133,6 +136,7 @@ type hypervisor struct { MemSlots uint32 `toml:"memory_slots"` DefaultBridges uint32 `toml:"default_bridges"` Msize9p uint32 `toml:"msize_9p"` + RemoteHypervisorTimeout uint32 `toml:"remote_hypervisor_timeout"` NumVCPUs float32 `toml:"default_vcpus"` BlockDeviceCacheSet bool `toml:"block_device_cache_set"` BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"` @@ -646,6 +650,20 @@ func (h hypervisor) getIOMMUPlatform() bool { return h.IOMMUPlatform } +func (h hypervisor) getRemoteHypervisorSocket() string { + if h.RemoteHypervisorSocket == "" { + return defaultRemoteHypervisorSocket + } + return h.RemoteHypervisorSocket +} + +func (h hypervisor) getRemoteHypervisorTimeout() uint32 { + if h.RemoteHypervisorTimeout == 0 { + return defaultRemoteHypervisorTimeout + } + return h.RemoteHypervisorTimeout +} + func (a agent) debugConsoleEnabled() bool { return a.DebugConsoleEnabled } @@ -1141,6 +1159,120 @@ func newDragonballHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { }, nil } +func newStratovirtHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { + hypervisor, err := h.path() + if err != nil { + return vc.HypervisorConfig{}, err + } + + kernel, err := h.kernel() + if err != nil { + return vc.HypervisorConfig{}, err + } + + initrd, err := h.initrd() + if err != nil { + return vc.HypervisorConfig{}, err + } + + image, err := h.image() + if err != nil { + return vc.HypervisorConfig{}, err + } + + if image != "" && initrd != "" { + return vc.HypervisorConfig{}, + errors.New("having both an image and an initrd defined in the configuration file is not supported") + } + + if image == "" && initrd == "" { + return vc.HypervisorConfig{}, + errors.New("image or initrd must be defined in the configuration file") + } + + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + + kernelParams := h.kernelParams() + machineType := h.machineType() + + blockDriver, err := h.blockDeviceDriver() + if err != nil { + return vc.HypervisorConfig{}, err + } + + if vSock, err := utils.SupportsVsocks(); !vSock { + return vc.HypervisorConfig{}, err + } + + sharedFS, err := h.sharedFS() + if err != nil { + return vc.HypervisorConfig{}, err + } + + if sharedFS != config.VirtioFS && sharedFS != config.VirtioFSNydus && sharedFS != config.NoSharedFS { + return vc.HypervisorConfig{}, + fmt.Errorf("Stratovirt Hypervisor does not support %s shared filesystem option", sharedFS) + } + + if (sharedFS == config.VirtioFS || sharedFS == config.VirtioFSNydus) && h.VirtioFSDaemon == "" { + return vc.HypervisorConfig{}, + fmt.Errorf("cannot enable %s without daemon path in configuration file", sharedFS) + } + + return vc.HypervisorConfig{ + HypervisorPath: hypervisor, + HypervisorPathList: h.HypervisorPathList, + KernelPath: kernel, + InitrdPath: initrd, + ImagePath: image, + RootfsType: rootfsType, + KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), + HypervisorMachineType: machineType, + NumVCPUsF: h.defaultVCPUs(), + DefaultMaxVCPUs: h.defaultMaxVCPUs(), + MemorySize: h.defaultMemSz(), + MemSlots: h.defaultMemSlots(), + MemOffset: h.defaultMemOffset(), + DefaultMaxMemorySize: h.defaultMaxMemSz(), + EntropySource: h.GetEntropySource(), + DefaultBridges: h.defaultBridges(), + DisableBlockDeviceUse: h.DisableBlockDeviceUse, + SharedFS: sharedFS, + VirtioFSDaemon: h.VirtioFSDaemon, + VirtioFSDaemonList: h.VirtioFSDaemonList, + VirtioFSCacheSize: h.VirtioFSCacheSize, + VirtioFSCache: h.defaultVirtioFSCache(), + VirtioFSExtraArgs: h.VirtioFSExtraArgs, + HugePages: h.HugePages, + Debug: h.Debug, + DisableNestingChecks: h.DisableNestingChecks, + BlockDeviceDriver: blockDriver, + DisableVhostNet: true, + GuestHookPath: h.guestHookPath(), + EnableAnnotations: h.EnableAnnotations, + DisableSeccomp: h.DisableSeccomp, + DisableSeLinux: h.DisableSeLinux, + DisableGuestSeLinux: h.DisableGuestSeLinux, + }, nil +} + +func newRemoteHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { + + return vc.HypervisorConfig{ + RemoteHypervisorSocket: h.getRemoteHypervisorSocket(), + RemoteHypervisorTimeout: h.getRemoteHypervisorTimeout(), + DisableGuestSeLinux: true, // The remote hypervisor has a different guest, so Guest SELinux config doesn't work + + // No valid value so avoid to append block device to list in kata_agent.appendDevices + BlockDeviceDriver: "dummy", + EnableAnnotations: h.EnableAnnotations, + GuestHookPath: h.guestHookPath(), + }, nil +} + func newFactoryConfig(f factory) (oci.FactoryConfig, error) { if f.TemplatePath == "" { f.TemplatePath = defaultTemplatePath @@ -1177,6 +1309,12 @@ func updateRuntimeConfigHypervisor(configPath string, tomlConf tomlConfig, confi case dragonballHypervisorTableType: config.HypervisorType = vc.DragonballHypervisor hConfig, err = newDragonballHypervisorConfig(hypervisor) + case stratovirtHypervisorTableType: + config.HypervisorType = vc.StratovirtHypervisor + hConfig, err = newStratovirtHypervisorConfig(hypervisor) + case remoteHypervisorTableType: + config.HypervisorType = vc.RemoteHypervisor + hConfig, err = newRemoteHypervisorConfig(hypervisor) default: err = fmt.Errorf("%s: %+q", errInvalidHypervisorPrefix, k) } @@ -1778,6 +1916,11 @@ func checkFactoryConfig(config oci.RuntimeConfig) error { // checkHypervisorConfig performs basic "sanity checks" on the hypervisor // config. func checkHypervisorConfig(config vc.HypervisorConfig) error { + + if config.RemoteHypervisorSocket != "" { + return nil + } + type image struct { path string initrd bool diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index 39ccb49f2b74..3ecb1517e5c5 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -1564,6 +1564,17 @@ func TestCheckHypervisorConfig(t *testing.T) { // reset logger kataUtilsLogger.Logger.Out = savedOut } + + // Check remote hypervisor doesn't error with missing unnescessary config + remoteConfig := vc.HypervisorConfig{ + RemoteHypervisorSocket: "dummy_socket", + ImagePath: "", + InitrdPath: "", + MemorySize: 0, + } + + err := checkHypervisorConfig(remoteConfig) + assert.NoError(err, "remote hypervisor config") } func TestCheckNetNsConfig(t *testing.T) { diff --git a/src/runtime/protocols/hypervisor/hypervisor.pb.go b/src/runtime/protocols/hypervisor/hypervisor.pb.go new file mode 100644 index 000000000000..0195a2c82cd1 --- /dev/null +++ b/src/runtime/protocols/hypervisor/hypervisor.pb.go @@ -0,0 +1,619 @@ +// (C) Copyright IBM Corp. 2022. +// SPDX-License-Identifier: Apache-2.0 + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.31.0 +// protoc v3.12.4 +// source: hypervisor.proto + +package __ + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type VersionRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Version string `protobuf:"bytes,1,opt,name=version,proto3" json:"version,omitempty"` +} + +func (x *VersionRequest) Reset() { + *x = VersionRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_hypervisor_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *VersionRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*VersionRequest) ProtoMessage() {} + +func (x *VersionRequest) ProtoReflect() protoreflect.Message { + mi := &file_hypervisor_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use VersionRequest.ProtoReflect.Descriptor instead. +func (*VersionRequest) Descriptor() ([]byte, []int) { + return file_hypervisor_proto_rawDescGZIP(), []int{0} +} + +func (x *VersionRequest) GetVersion() string { + if x != nil { + return x.Version + } + return "" +} + +type VersionResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Version string `protobuf:"bytes,1,opt,name=version,proto3" json:"version,omitempty"` +} + +func (x *VersionResponse) Reset() { + *x = VersionResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_hypervisor_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *VersionResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*VersionResponse) ProtoMessage() {} + +func (x *VersionResponse) ProtoReflect() protoreflect.Message { + mi := &file_hypervisor_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use VersionResponse.ProtoReflect.Descriptor instead. +func (*VersionResponse) Descriptor() ([]byte, []int) { + return file_hypervisor_proto_rawDescGZIP(), []int{1} +} + +func (x *VersionResponse) GetVersion() string { + if x != nil { + return x.Version + } + return "" +} + +type CreateVMRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + Annotations map[string]string `protobuf:"bytes,2,rep,name=annotations,proto3" json:"annotations,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + NetworkNamespacePath string `protobuf:"bytes,3,opt,name=networkNamespacePath,proto3" json:"networkNamespacePath,omitempty"` +} + +func (x *CreateVMRequest) Reset() { + *x = CreateVMRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_hypervisor_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CreateVMRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CreateVMRequest) ProtoMessage() {} + +func (x *CreateVMRequest) ProtoReflect() protoreflect.Message { + mi := &file_hypervisor_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CreateVMRequest.ProtoReflect.Descriptor instead. +func (*CreateVMRequest) Descriptor() ([]byte, []int) { + return file_hypervisor_proto_rawDescGZIP(), []int{2} +} + +func (x *CreateVMRequest) GetId() string { + if x != nil { + return x.Id + } + return "" +} + +func (x *CreateVMRequest) GetAnnotations() map[string]string { + if x != nil { + return x.Annotations + } + return nil +} + +func (x *CreateVMRequest) GetNetworkNamespacePath() string { + if x != nil { + return x.NetworkNamespacePath + } + return "" +} + +type CreateVMResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + AgentSocketPath string `protobuf:"bytes,1,opt,name=agentSocketPath,proto3" json:"agentSocketPath,omitempty"` +} + +func (x *CreateVMResponse) Reset() { + *x = CreateVMResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_hypervisor_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CreateVMResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CreateVMResponse) ProtoMessage() {} + +func (x *CreateVMResponse) ProtoReflect() protoreflect.Message { + mi := &file_hypervisor_proto_msgTypes[3] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CreateVMResponse.ProtoReflect.Descriptor instead. +func (*CreateVMResponse) Descriptor() ([]byte, []int) { + return file_hypervisor_proto_rawDescGZIP(), []int{3} +} + +func (x *CreateVMResponse) GetAgentSocketPath() string { + if x != nil { + return x.AgentSocketPath + } + return "" +} + +type StartVMRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` +} + +func (x *StartVMRequest) Reset() { + *x = StartVMRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_hypervisor_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StartVMRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StartVMRequest) ProtoMessage() {} + +func (x *StartVMRequest) ProtoReflect() protoreflect.Message { + mi := &file_hypervisor_proto_msgTypes[4] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StartVMRequest.ProtoReflect.Descriptor instead. +func (*StartVMRequest) Descriptor() ([]byte, []int) { + return file_hypervisor_proto_rawDescGZIP(), []int{4} +} + +func (x *StartVMRequest) GetId() string { + if x != nil { + return x.Id + } + return "" +} + +type StartVMResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields +} + +func (x *StartVMResponse) Reset() { + *x = StartVMResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_hypervisor_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StartVMResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StartVMResponse) ProtoMessage() {} + +func (x *StartVMResponse) ProtoReflect() protoreflect.Message { + mi := &file_hypervisor_proto_msgTypes[5] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StartVMResponse.ProtoReflect.Descriptor instead. +func (*StartVMResponse) Descriptor() ([]byte, []int) { + return file_hypervisor_proto_rawDescGZIP(), []int{5} +} + +type StopVMRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` +} + +func (x *StopVMRequest) Reset() { + *x = StopVMRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_hypervisor_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StopVMRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StopVMRequest) ProtoMessage() {} + +func (x *StopVMRequest) ProtoReflect() protoreflect.Message { + mi := &file_hypervisor_proto_msgTypes[6] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StopVMRequest.ProtoReflect.Descriptor instead. +func (*StopVMRequest) Descriptor() ([]byte, []int) { + return file_hypervisor_proto_rawDescGZIP(), []int{6} +} + +func (x *StopVMRequest) GetId() string { + if x != nil { + return x.Id + } + return "" +} + +type StopVMResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields +} + +func (x *StopVMResponse) Reset() { + *x = StopVMResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_hypervisor_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StopVMResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StopVMResponse) ProtoMessage() {} + +func (x *StopVMResponse) ProtoReflect() protoreflect.Message { + mi := &file_hypervisor_proto_msgTypes[7] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StopVMResponse.ProtoReflect.Descriptor instead. +func (*StopVMResponse) Descriptor() ([]byte, []int) { + return file_hypervisor_proto_rawDescGZIP(), []int{7} +} + +var File_hypervisor_proto protoreflect.FileDescriptor + +var file_hypervisor_proto_rawDesc = []byte{ + 0x0a, 0x10, 0x68, 0x79, 0x70, 0x65, 0x72, 0x76, 0x69, 0x73, 0x6f, 0x72, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x12, 0x0a, 0x68, 0x79, 0x70, 0x65, 0x72, 0x76, 0x69, 0x73, 0x6f, 0x72, 0x22, 0x2a, + 0x0a, 0x0e, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x2b, 0x0a, 0x0f, 0x56, 0x65, + 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x18, 0x0a, + 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, + 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0xe5, 0x01, 0x0a, 0x0f, 0x43, 0x72, 0x65, 0x61, + 0x74, 0x65, 0x56, 0x4d, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, + 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x4e, 0x0a, 0x0b, 0x61, + 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x2c, 0x2e, 0x68, 0x79, 0x70, 0x65, 0x72, 0x76, 0x69, 0x73, 0x6f, 0x72, 0x2e, 0x43, 0x72, + 0x65, 0x61, 0x74, 0x65, 0x56, 0x4d, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x41, 0x6e, + 0x6e, 0x6f, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0b, + 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x32, 0x0a, 0x14, 0x6e, + 0x65, 0x74, 0x77, 0x6f, 0x72, 0x6b, 0x4e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x50, + 0x61, 0x74, 0x68, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x14, 0x6e, 0x65, 0x74, 0x77, 0x6f, + 0x72, 0x6b, 0x4e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x50, 0x61, 0x74, 0x68, 0x1a, + 0x3e, 0x0a, 0x10, 0x41, 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x45, 0x6e, + 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, + 0x3c, 0x0a, 0x10, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x56, 0x4d, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x12, 0x28, 0x0a, 0x0f, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x53, 0x6f, 0x63, 0x6b, + 0x65, 0x74, 0x50, 0x61, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x61, 0x67, + 0x65, 0x6e, 0x74, 0x53, 0x6f, 0x63, 0x6b, 0x65, 0x74, 0x50, 0x61, 0x74, 0x68, 0x22, 0x20, 0x0a, + 0x0e, 0x53, 0x74, 0x61, 0x72, 0x74, 0x56, 0x4d, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, + 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x22, + 0x11, 0x0a, 0x0f, 0x53, 0x74, 0x61, 0x72, 0x74, 0x56, 0x4d, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x22, 0x1f, 0x0a, 0x0d, 0x53, 0x74, 0x6f, 0x70, 0x56, 0x4d, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x02, 0x69, 0x64, 0x22, 0x10, 0x0a, 0x0e, 0x53, 0x74, 0x6f, 0x70, 0x56, 0x4d, 0x52, 0x65, 0x73, + 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x32, 0xa4, 0x02, 0x0a, 0x0a, 0x48, 0x79, 0x70, 0x65, 0x72, 0x76, + 0x69, 0x73, 0x6f, 0x72, 0x12, 0x47, 0x0a, 0x08, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x56, 0x4d, + 0x12, 0x1b, 0x2e, 0x68, 0x79, 0x70, 0x65, 0x72, 0x76, 0x69, 0x73, 0x6f, 0x72, 0x2e, 0x43, 0x72, + 0x65, 0x61, 0x74, 0x65, 0x56, 0x4d, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1c, 0x2e, + 0x68, 0x79, 0x70, 0x65, 0x72, 0x76, 0x69, 0x73, 0x6f, 0x72, 0x2e, 0x43, 0x72, 0x65, 0x61, 0x74, + 0x65, 0x56, 0x4d, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x44, 0x0a, + 0x07, 0x53, 0x74, 0x61, 0x72, 0x74, 0x56, 0x4d, 0x12, 0x1a, 0x2e, 0x68, 0x79, 0x70, 0x65, 0x72, + 0x76, 0x69, 0x73, 0x6f, 0x72, 0x2e, 0x53, 0x74, 0x61, 0x72, 0x74, 0x56, 0x4d, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1b, 0x2e, 0x68, 0x79, 0x70, 0x65, 0x72, 0x76, 0x69, 0x73, 0x6f, + 0x72, 0x2e, 0x53, 0x74, 0x61, 0x72, 0x74, 0x56, 0x4d, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x06, 0x53, 0x74, 0x6f, 0x70, 0x56, 0x4d, 0x12, 0x19, 0x2e, + 0x68, 0x79, 0x70, 0x65, 0x72, 0x76, 0x69, 0x73, 0x6f, 0x72, 0x2e, 0x53, 0x74, 0x6f, 0x70, 0x56, + 0x4d, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1a, 0x2e, 0x68, 0x79, 0x70, 0x65, 0x72, + 0x76, 0x69, 0x73, 0x6f, 0x72, 0x2e, 0x53, 0x74, 0x6f, 0x70, 0x56, 0x4d, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x44, 0x0a, 0x07, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, + 0x6e, 0x12, 0x1a, 0x2e, 0x68, 0x79, 0x70, 0x65, 0x72, 0x76, 0x69, 0x73, 0x6f, 0x72, 0x2e, 0x56, + 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1b, 0x2e, + 0x68, 0x79, 0x70, 0x65, 0x72, 0x76, 0x69, 0x73, 0x6f, 0x72, 0x2e, 0x56, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x04, 0x5a, 0x02, + 0x2e, 0x2f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_hypervisor_proto_rawDescOnce sync.Once + file_hypervisor_proto_rawDescData = file_hypervisor_proto_rawDesc +) + +func file_hypervisor_proto_rawDescGZIP() []byte { + file_hypervisor_proto_rawDescOnce.Do(func() { + file_hypervisor_proto_rawDescData = protoimpl.X.CompressGZIP(file_hypervisor_proto_rawDescData) + }) + return file_hypervisor_proto_rawDescData +} + +var file_hypervisor_proto_msgTypes = make([]protoimpl.MessageInfo, 9) +var file_hypervisor_proto_goTypes = []interface{}{ + (*VersionRequest)(nil), // 0: hypervisor.VersionRequest + (*VersionResponse)(nil), // 1: hypervisor.VersionResponse + (*CreateVMRequest)(nil), // 2: hypervisor.CreateVMRequest + (*CreateVMResponse)(nil), // 3: hypervisor.CreateVMResponse + (*StartVMRequest)(nil), // 4: hypervisor.StartVMRequest + (*StartVMResponse)(nil), // 5: hypervisor.StartVMResponse + (*StopVMRequest)(nil), // 6: hypervisor.StopVMRequest + (*StopVMResponse)(nil), // 7: hypervisor.StopVMResponse + nil, // 8: hypervisor.CreateVMRequest.AnnotationsEntry +} +var file_hypervisor_proto_depIdxs = []int32{ + 8, // 0: hypervisor.CreateVMRequest.annotations:type_name -> hypervisor.CreateVMRequest.AnnotationsEntry + 2, // 1: hypervisor.Hypervisor.CreateVM:input_type -> hypervisor.CreateVMRequest + 4, // 2: hypervisor.Hypervisor.StartVM:input_type -> hypervisor.StartVMRequest + 6, // 3: hypervisor.Hypervisor.StopVM:input_type -> hypervisor.StopVMRequest + 0, // 4: hypervisor.Hypervisor.Version:input_type -> hypervisor.VersionRequest + 3, // 5: hypervisor.Hypervisor.CreateVM:output_type -> hypervisor.CreateVMResponse + 5, // 6: hypervisor.Hypervisor.StartVM:output_type -> hypervisor.StartVMResponse + 7, // 7: hypervisor.Hypervisor.StopVM:output_type -> hypervisor.StopVMResponse + 1, // 8: hypervisor.Hypervisor.Version:output_type -> hypervisor.VersionResponse + 5, // [5:9] is the sub-list for method output_type + 1, // [1:5] is the sub-list for method input_type + 1, // [1:1] is the sub-list for extension type_name + 1, // [1:1] is the sub-list for extension extendee + 0, // [0:1] is the sub-list for field type_name +} + +func init() { file_hypervisor_proto_init() } +func file_hypervisor_proto_init() { + if File_hypervisor_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_hypervisor_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*VersionRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_hypervisor_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*VersionResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_hypervisor_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CreateVMRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_hypervisor_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CreateVMResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_hypervisor_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*StartVMRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_hypervisor_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*StartVMResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_hypervisor_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*StopVMRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_hypervisor_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*StopVMResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_hypervisor_proto_rawDesc, + NumEnums: 0, + NumMessages: 9, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_hypervisor_proto_goTypes, + DependencyIndexes: file_hypervisor_proto_depIdxs, + MessageInfos: file_hypervisor_proto_msgTypes, + }.Build() + File_hypervisor_proto = out.File + file_hypervisor_proto_rawDesc = nil + file_hypervisor_proto_goTypes = nil + file_hypervisor_proto_depIdxs = nil +} diff --git a/src/runtime/protocols/hypervisor/hypervisor.proto b/src/runtime/protocols/hypervisor/hypervisor.proto new file mode 100644 index 000000000000..bc8a87b2ef2f --- /dev/null +++ b/src/runtime/protocols/hypervisor/hypervisor.proto @@ -0,0 +1,48 @@ +// (C) Copyright IBM Corp. 2022. +// SPDX-License-Identifier: Apache-2.0 + +syntax = "proto3"; + +option go_package = "./"; + +package hypervisor; + +service Hypervisor { + rpc CreateVM(CreateVMRequest) returns (CreateVMResponse) {} + rpc StartVM(StartVMRequest) returns (StartVMResponse) {} + rpc StopVM(StopVMRequest) returns (StopVMResponse) {} + rpc Version(VersionRequest) returns (VersionResponse) {} +} + + +message VersionRequest { + string version = 1; +} + +message VersionResponse { + string version = 1; +} + +message CreateVMRequest { + string id = 1; + map annotations = 2; + string networkNamespacePath = 3; +} + +message CreateVMResponse { + string agentSocketPath = 1; +} + +message StartVMRequest { + string id = 1; +} + +message StartVMResponse { +} + +message StopVMRequest { + string id = 1; +} + +message StopVMResponse { +} diff --git a/src/runtime/protocols/hypervisor/hypervisor_grpc.pb.go b/src/runtime/protocols/hypervisor/hypervisor_grpc.pb.go new file mode 100644 index 000000000000..96ec8df88913 --- /dev/null +++ b/src/runtime/protocols/hypervisor/hypervisor_grpc.pb.go @@ -0,0 +1,223 @@ +// (C) Copyright IBM Corp. 2022. +// SPDX-License-Identifier: Apache-2.0 + +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.3.0 +// - protoc v3.12.4 +// source: hypervisor.proto + +package __ + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.32.0 or later. +const _ = grpc.SupportPackageIsVersion7 + +const ( + Hypervisor_CreateVM_FullMethodName = "/hypervisor.Hypervisor/CreateVM" + Hypervisor_StartVM_FullMethodName = "/hypervisor.Hypervisor/StartVM" + Hypervisor_StopVM_FullMethodName = "/hypervisor.Hypervisor/StopVM" + Hypervisor_Version_FullMethodName = "/hypervisor.Hypervisor/Version" +) + +// HypervisorClient is the client API for Hypervisor service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type HypervisorClient interface { + CreateVM(ctx context.Context, in *CreateVMRequest, opts ...grpc.CallOption) (*CreateVMResponse, error) + StartVM(ctx context.Context, in *StartVMRequest, opts ...grpc.CallOption) (*StartVMResponse, error) + StopVM(ctx context.Context, in *StopVMRequest, opts ...grpc.CallOption) (*StopVMResponse, error) + Version(ctx context.Context, in *VersionRequest, opts ...grpc.CallOption) (*VersionResponse, error) +} + +type hypervisorClient struct { + cc grpc.ClientConnInterface +} + +func NewHypervisorClient(cc grpc.ClientConnInterface) HypervisorClient { + return &hypervisorClient{cc} +} + +func (c *hypervisorClient) CreateVM(ctx context.Context, in *CreateVMRequest, opts ...grpc.CallOption) (*CreateVMResponse, error) { + out := new(CreateVMResponse) + err := c.cc.Invoke(ctx, Hypervisor_CreateVM_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *hypervisorClient) StartVM(ctx context.Context, in *StartVMRequest, opts ...grpc.CallOption) (*StartVMResponse, error) { + out := new(StartVMResponse) + err := c.cc.Invoke(ctx, Hypervisor_StartVM_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *hypervisorClient) StopVM(ctx context.Context, in *StopVMRequest, opts ...grpc.CallOption) (*StopVMResponse, error) { + out := new(StopVMResponse) + err := c.cc.Invoke(ctx, Hypervisor_StopVM_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *hypervisorClient) Version(ctx context.Context, in *VersionRequest, opts ...grpc.CallOption) (*VersionResponse, error) { + out := new(VersionResponse) + err := c.cc.Invoke(ctx, Hypervisor_Version_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +// HypervisorServer is the server API for Hypervisor service. +// All implementations must embed UnimplementedHypervisorServer +// for forward compatibility +type HypervisorServer interface { + CreateVM(context.Context, *CreateVMRequest) (*CreateVMResponse, error) + StartVM(context.Context, *StartVMRequest) (*StartVMResponse, error) + StopVM(context.Context, *StopVMRequest) (*StopVMResponse, error) + Version(context.Context, *VersionRequest) (*VersionResponse, error) + mustEmbedUnimplementedHypervisorServer() +} + +// UnimplementedHypervisorServer must be embedded to have forward compatible implementations. +type UnimplementedHypervisorServer struct { +} + +func (UnimplementedHypervisorServer) CreateVM(context.Context, *CreateVMRequest) (*CreateVMResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method CreateVM not implemented") +} +func (UnimplementedHypervisorServer) StartVM(context.Context, *StartVMRequest) (*StartVMResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method StartVM not implemented") +} +func (UnimplementedHypervisorServer) StopVM(context.Context, *StopVMRequest) (*StopVMResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method StopVM not implemented") +} +func (UnimplementedHypervisorServer) Version(context.Context, *VersionRequest) (*VersionResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method Version not implemented") +} +func (UnimplementedHypervisorServer) mustEmbedUnimplementedHypervisorServer() {} + +// UnsafeHypervisorServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to HypervisorServer will +// result in compilation errors. +type UnsafeHypervisorServer interface { + mustEmbedUnimplementedHypervisorServer() +} + +func RegisterHypervisorServer(s grpc.ServiceRegistrar, srv HypervisorServer) { + s.RegisterService(&Hypervisor_ServiceDesc, srv) +} + +func _Hypervisor_CreateVM_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(CreateVMRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(HypervisorServer).CreateVM(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Hypervisor_CreateVM_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(HypervisorServer).CreateVM(ctx, req.(*CreateVMRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Hypervisor_StartVM_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(StartVMRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(HypervisorServer).StartVM(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Hypervisor_StartVM_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(HypervisorServer).StartVM(ctx, req.(*StartVMRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Hypervisor_StopVM_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(StopVMRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(HypervisorServer).StopVM(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Hypervisor_StopVM_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(HypervisorServer).StopVM(ctx, req.(*StopVMRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Hypervisor_Version_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VersionRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(HypervisorServer).Version(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: Hypervisor_Version_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(HypervisorServer).Version(ctx, req.(*VersionRequest)) + } + return interceptor(ctx, in, info, handler) +} + +// Hypervisor_ServiceDesc is the grpc.ServiceDesc for Hypervisor service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var Hypervisor_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "hypervisor.Hypervisor", + HandlerType: (*HypervisorServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "CreateVM", + Handler: _Hypervisor_CreateVM_Handler, + }, + { + MethodName: "StartVM", + Handler: _Hypervisor_StartVM_Handler, + }, + { + MethodName: "StopVM", + Handler: _Hypervisor_StopVM_Handler, + }, + { + MethodName: "Version", + Handler: _Hypervisor_Version_Handler, + }, + }, + Streams: []grpc.StreamDesc{}, + Metadata: "hypervisor.proto", +} diff --git a/src/runtime/virtcontainers/fs_share_linux.go b/src/runtime/virtcontainers/fs_share_linux.go index 5bafb9e40302..029206f1fce7 100644 --- a/src/runtime/virtcontainers/fs_share_linux.go +++ b/src/runtime/virtcontainers/fs_share_linux.go @@ -29,6 +29,29 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" ) +// Splitting Regex pattern: +// configVolRegex: Regex to match directory structure for k8's volume mounts. +// Use regex for strict matching instead of strings.Contains +// match for kubernetes.io~configmap, kubernetes.io~secret, kubernetes.io~projected, kubernetes.io~downward-api +// as recommended in review comments for PR #7211 + +// Example directory structure for the volume mounts. +// /var/lib/kubelet/pods/f51ae853-557e-4ce1-b60b-a1101b555612/volumes/kubernetes.io~configmap +// /var/lib/kubelet/pods/f51ae853-557e-4ce1-b60b-a1101b555612/volumes/kubernetes.io~secret +// /var/lib/kubelet/pods/f51ae853-557e-4ce1-b60b-a1101b555612/volumes/kubernetes.io~projected +// /var/lib/kubelet/pods/f51ae853-557e-4ce1-b60b-a1101b555612/volumes/kubernetes.io~downward-api +var configVolRegexString = "^/var/lib/kubelet/pods/[a-fA-F0-9\\-]{36}/volumes/kubernetes\\.io~(configmap|secret|projected|downward-api)" +var configVolRegex = regexp.MustCompile(configVolRegexString) + +// timestampDirRegex: Regex to match only the timestamped directory inside the above volume mount +// Regex for the temp directory with timestamp that is used to handle the updates by K8s +// Examples +// /var/lib/kubelet/pods/e33907eb-54c7-4113-a3dc-447f247084cc/volumes/kubernetes.io~secret/foosecret/..2023_07_27_07_13_00.1257228 +// /var/lib/kubelet/pods/e33907eb-54c7-4113-a3dc-447f247084cc/volumes/kubernetes.io~downward-api/fooinfo/..2023_07_27_07_13_00.3704578339 +// The timestamp is of the format 2023_07_27_07_13_00.3704578339 or 2023_07_27_07_13_00.1257228 +var timestampDirRegexString = ".*[0-9]{4}_[0-9]{2}_[0-9]{2}_[0-9]{2}_[0-9]{2}_[0-9]{2}.[0-9]+$" +var timestampDirRegex = regexp.MustCompile(configVolRegexString + timestampDirRegexString) + func unmountNoFollow(path string) error { return syscall.Unmount(path, syscall.MNT_DETACH|UmountNoFollow) } @@ -293,27 +316,14 @@ func (f *FilesystemShare) ShareFile(ctx context.Context, c *Container, m *Mount) return err } - // Add fsNotify watcher for volume mounts - // Use regex for strict matching instead of strings.Contains - // match for kubernetes.io~configmap, kubernetes.io~secret, kubernetes.io~projected, kubernetes.io~downward-api - // as recommended in review comments for PR #7211 - - // Example directory structure for the volume mounts. - // /var/lib/kubelet/pods/f51ae853-557e-4ce1-b60b-a1101b555612/volumes/kubernetes.io~configmap - // /var/lib/kubelet/pods/f51ae853-557e-4ce1-b60b-a1101b555612/volumes/kubernetes.io~secret - // /var/lib/kubelet/pods/f51ae853-557e-4ce1-b60b-a1101b555612/volumes/kubernetes.io~projected - // /var/lib/kubelet/pods/f51ae853-557e-4ce1-b60b-a1101b555612/volumes/kubernetes.io~downward-api - - // More relaxed regex for the pod UID - // `^/var/lib/kubelet/pods/[a-fA-F0-9\-]+/volumes/kubernetes\.io~(configmap|secret|projected|downward-api)` - - //TODO: Move this to a global variable and compile only once. - regex := regexp.MustCompile(`^/var/lib/kubelet/pods/[a-fA-F0-9\-]{36}/volumes/kubernetes\.io~(configmap|secret|projected|downward-api)`) - if regex.MatchString(srcPath) { + if configVolRegex.MatchString(srcPath) { // fsNotify doesn't add watcher recursively. // So we need to add the watcher for directories under kubernetes.io~configmap, kubernetes.io~secret, // kubernetes.io~downward-api and kubernetes.io~projected - if info.Mode().IsDir() { + + // Add watcher only to the timestamped directory containing secrets to prevent + // multiple events received from also watching the parent directory. + if info.Mode().IsDir() && timestampDirRegex.MatchString(srcPath) { // The cm dir is of the form /var/lib/kubelet/pods//volumes/kubernetes.io~configmap/foo/{..data, key1, key2,...} // The secret dir is of the form /var/lib/kubelet/pods//volumes/kubernetes.io~secret/foo/{..data, key1, key2,...} // The projected dir is of the form /var/lib/kubelet/pods//volumes/kubernetes.io~projected/foo/{..data, key1, key2,...} @@ -325,7 +335,7 @@ func (f *FilesystemShare) ShareFile(ctx context.Context, c *Container, m *Mount) return err } } else { - f.Logger().Infof("ShareFile: srcPath(%s) is not a directory", srcPath) + f.Logger().Infof("ShareFile: srcPath(%s) is not a timestamped directory", srcPath) } // Add the source and destination to the global map which will be used by the event loop // to copy the modified content to the destination @@ -335,7 +345,6 @@ func (f *FilesystemShare) ShareFile(ctx context.Context, c *Container, m *Mount) defer f.srcDstMapLock.Unlock() f.srcDstMap[srcPath] = append(f.srcDstMap[srcPath], dstPath) } - return nil } @@ -637,14 +646,6 @@ func (f *FilesystemShare) StartFileEventWatcher(ctx context.Context) error { f.eventLoopStarted = true f.eventLoopStartedLock.Unlock() - // Regex for the temp directory with timestamp that is used to handle the updates by K8s - // Examples - // /var/lib/kubelet/pods/e33907eb-54c7-4113-a3dc-447f247084cc/volumes/kubernetes.io~secret/foosecret/..2023_07_27_07_13_00.1257228 - // /var/lib/kubelet/pods/e33907eb-54c7-4113-a3dc-447f247084cc/volumes/kubernetes.io~downward-api/fooinfo/..2023_07_27_07_13_00.3704578339 - // The timestamp is of the format 2023_07_27_07_13_00.3704578339 or 2023_07_27_07_13_00.1257228 - - var re = regexp.MustCompile(`(?m)\s*[0-9]{4}_[0-9]{2}_[0-9]{2}_[0-9]{2}_[0-9]{2}_[0-9]{2}.[0-9]*$`) - f.Logger().Debugf("StartFileEventWatcher: srcDstMap dump %v", f.srcDstMap) for { @@ -701,7 +702,7 @@ func (f *FilesystemShare) StartFileEventWatcher(ctx context.Context) error { source := event.Name f.Logger().Infof("StartFileEventWatcher: source for the event: %s", source) - if re.FindString(source) != "" { + if timestampDirRegex.FindString(source) != "" { // This block will be entered when the timestamped directory is removed. // This also indicates that foo/..data contains the updated info @@ -720,7 +721,7 @@ func (f *FilesystemShare) StartFileEventWatcher(ctx context.Context) error { f.Logger().Infof("StartFileEventWatcher: Copy file from src (%s) to dst (%s)", dataDir, destination) // We explicitly ignore any errors here. Copy will continue for other files // Errors are logged in the copyFilesFromDataDir method - _ = f.copyFilesFromDataDir(dataDir, destination) + _ = f.copyUpdatedFiles(dataDir, destination, source) } f.srcDstMapLock.Unlock() } @@ -740,71 +741,103 @@ func (f *FilesystemShare) StartFileEventWatcher(ctx context.Context) error { } } -func (f *FilesystemShare) copyFilesFromDataDir(src, dst string) error { - - // The src is a symlink and is of the following form: - // /var/lib/kubelet/pods//volumes//foo/..data - // eg, for configmap, src = /var/lib/kubelet/pods/b44e3261-7cf0-48d3-83b4-6094bba95dc8/volumes/kubernetes.io~configmap/foo/..data - // The dst is of the following form: - // /run/kata-containers/shared/containers/-/..data - // eg. dst = /run/kata-containers/shared/containers/e70739a6cc38daf15de916b4d22aad035d42bc977024f2c8cae6b0b607251d44-39407b03e4b448f1-config-volume/..data +func (f *FilesystemShare) copyUpdatedFiles(src, dst, oldtsDir string) error { + f.Logger().Infof("copyUpdatedFiles: Copy src:%s to dst:%s from old src:%s", src, dst, oldtsDir) + // 1. Read the symlink and get the actual data directory // Get the symlink target // eg. srcdir = ..2023_02_09_06_40_51.2326009790 - srcdir, err := os.Readlink(src) + srcnewtsdir, err := os.Readlink(src) if err != nil { - f.Logger().Infof("copyFilesFromDataDir: Reading data symlink returned error (%v)", err) + f.Logger().WithError(err).Errorf("copyUpdatedFiles: Reading data symlink %s returned error", src) return err } - // Get the base directory path of src - volumeDir := filepath.Dir(src) - // eg. volumeDir = /var/lib/kubelet/pods/b44e3261-7cf0-48d3-83b4-6094bba95dc8/volumes/kubernetes.io~configmap/foo + // 2. Construct the path to new timestamped directory in host + srcBasePath := filepath.Dir(src) + srcNewTsPath := filepath.Join(srcBasePath, srcnewtsdir) - dataDir := filepath.Join(volumeDir, srcdir) - // eg. dataDir = /var/lib/kubelet/pods/b44e3261-7cf0-48d3-83b4-6094bba95dc8/volumes/kubernetes.io~configmap/foo/..2023_02_09_06_40_51.2326009790 + // 3. Construct the path to copy new timestamped directory in guest + dstBasePath := filepath.Dir(dst) + dstNewTsPath := filepath.Join(dstBasePath, srcnewtsdir) - f.Logger().Infof("copyFilesFromDataDir: full path to data symlink (%s)", dataDir) + // 4. Create a hashmap to add newly added secrets (not present in the old ts directory) + // for creating user visible symlinks + newSecrets := make(map[string]string) - // Using WalkDir is more efficient than Walk - err = filepath.WalkDir(dataDir, - func(path string, d fs.DirEntry, err error) error { - if err != nil { - f.Logger().Infof("copyFilesFromDataDir: Error in file walk %v", err) - return err - } + f.Logger().Infof("copyUpdatedFiles: new src dir: %s && new dst dir:%s", srcNewTsPath, dstNewTsPath) - // eg. path = /var/lib/kubelet/pods/b44e3261-7cf0-48d3-83b4-6094bba95dc8/volumes/kubernetes.io~configmap/foo/..2023_02_09_06_40_51.2326009790/{key1, key2, ...} - f.Logger().Infof("copyFilesFromDataDir: path (%s)", path) - if !d.IsDir() { - // Using filePath.Rel to handle these cases - // /var/lib/kubelet/pods/2481b69e-9ac8-475a-9e11-88af1daca60e/volumes/kubernetes.io~projected/all-in-one/..2023_02_13_12_35_49.1380323032/config-dir1/config.file1 - // /var/lib/kubelet/pods/2481b69e-9ac8-475a-9e11-88af1daca60e/volumes/kubernetes.io~projected/all-in-one/..2023_02_13_12_35_49.1380323032/config.file2 - rel, err := filepath.Rel(dataDir, path) - if err != nil { - f.Logger().Infof("copyFilesFromDataDir: Unable to get relative path") - return err - } - f.Logger().Debugf("copyFilesFromDataDir: dataDir(%s), path(%s), rel(%s)", dataDir, path, rel) - // Form the destination path in the guest - dstFile := filepath.Join(dst, rel) - f.Logger().Infof("copyFilesFromDataDir: Copying file %s to dst %s", path, dstFile) - err = f.sandbox.agent.copyFile(context.Background(), path, dstFile) - if err != nil { - f.Logger().Infof("copyFilesFromDataDir: Error in copying file %v", err) - return err - } - f.Logger().Infof("copyFilesFromDataDir: Successfully copied file (%s)", path) + // 5. Copy all the files from the new timestamped directory to the guest + walk := func(srcPath string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + + info, err := d.Info() + if err != nil { + return err + } + dstPath := dstNewTsPath + if !info.Mode().IsDir() { + // Construct the path for the files to be copied to. + dstPath = filepath.Join(dstPath, filepath.Base(srcPath)) + + // Determine if this secret was present in the old timestamped directory. + // If not, add it to the newSecrets map to create user visible symlinks. + oldSecret := filepath.Join(oldtsDir, filepath.Base(srcPath)) + if _, ok := f.srcDstMap[oldSecret]; !ok { + // these are symlinks to '..data' inside the k8's volume + symlinkSrc := filepath.Join(filepath.Dir(srcNewTsPath), filepath.Base(srcPath)) + symlinkDst := filepath.Join(filepath.Dir(dstNewTsPath), filepath.Base(srcPath)) + newSecrets[symlinkSrc] = symlinkDst } - return nil - }) + } + + err = f.sandbox.agent.copyFile(context.Background(), srcPath, dstPath) + if err != nil { + f.Logger().WithError(err).Error("Failed to copy file") + return err + } + // Create a new entry in the globalMap to be used in the event loop + f.Logger().Infof("copyUpdatedFiles: Adding srcPath(%s) dstPath(%s) to srcDstMap", srcPath, dstPath) + f.srcDstMap[srcPath] = append(f.srcDstMap[srcPath], dstPath) + return nil + } + + if err := filepath.WalkDir(srcNewTsPath, walk); err != nil { + f.Logger().WithError(err).Error("copyUpdatedFiles: failed to copy files.") + return err + } + + // 6. Add watcher to the new timestamped directory in host + err = f.watchDir(srcNewTsPath) + if err != nil { + f.Logger().WithError(err).Error("copyUpdatedFiles: Failed to add watcher on new ts source.") + return err + } + + // 7. Update the '..data' symlink to fix user visible files + srcDataPath := filepath.Join(filepath.Dir(srcNewTsPath), "..data") + dstDataPath := filepath.Join(filepath.Dir(dstNewTsPath), "..data") + err = f.sandbox.agent.copyFile(context.Background(), srcDataPath, dstDataPath) if err != nil { - f.Logger().Infof("copyFilesFromDataDir: Error in filepath.WalkDir (%v)", err) + f.Logger().WithError(err).Errorf("copyUpdatedFiles: Failed to update data symlink") return err } - f.Logger().Infof("copyFilesFromDataDir: Done") + // 8. Create user visible symlinks for any newly created secrets + // For existing secrets, the update to '..data' symlink above will fix the user visible files. + // TODO: For deleted secrets, the existing symlink will point to non-existing entity after + // update to '..data' symlink. Since there is NO DELETE-API in agent, the symlinks will exist + for k, v := range newSecrets { + err = f.sandbox.agent.copyFile(context.Background(), k, v) + if err != nil { + f.Logger().WithError(err).Error("copyUpdatedFiles: Failed to copy newly created secret") + return err + } + } + return nil } diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 63d391336b06..7c16064e1ff3 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -48,12 +48,18 @@ const ( // ClhHypervisor is the ICH hypervisor. ClhHypervisor HypervisorType = "clh" + // StratovirtHypervisor is the StratoVirt hypervisor. + StratovirtHypervisor HypervisorType = "stratovirt" + // DragonballHypervisor is the Dragonball hypervisor. DragonballHypervisor HypervisorType = "dragonball" // VirtFrameworkHypervisor is the Darwin Virtualization.framework hypervisor VirtframeworkHypervisor HypervisorType = "virtframework" + // RemoteHypervisor is the Remote hypervisor. + RemoteHypervisor HypervisorType = "remote" + // MockHypervisor is a mock hypervisor for testing purposes MockHypervisor HypervisorType = "mock" @@ -237,6 +243,9 @@ func (hType *HypervisorType) Set(value string) error { case "virtframework": *hType = VirtframeworkHypervisor return nil + case "remote": + *hType = RemoteHypervisor + return nil case "mock": *hType = MockHypervisor return nil @@ -256,6 +265,10 @@ func (hType *HypervisorType) String() string { return string(AcrnHypervisor) case ClhHypervisor: return string(ClhHypervisor) + case StratovirtHypervisor: + return string(StratovirtHypervisor) + case RemoteHypervisor: + return string(RemoteHypervisor) case MockHypervisor: return string(MockHypervisor) default: @@ -450,6 +463,15 @@ type HypervisorConfig struct { // BlockiDeviceAIO specifies the I/O API to be used. BlockDeviceAIO string + // The socket to connect to the remote hypervisor implementation on + RemoteHypervisorSocket string + + // The name of the sandbox (pod) + SandboxName string + + // The name of the namespace of the sandbox (pod) + SandboxNamespace string + // The user maps to the uid. User string @@ -558,6 +580,9 @@ type HypervisorConfig struct { // Group ID. Gid uint32 + // Timeout for actions e.g. startVM for the remote hypervisor + RemoteHypervisorTimeout uint32 + // BlockDeviceCacheSet specifies cache-related options will be set to block devices or not. BlockDeviceCacheSet bool diff --git a/src/runtime/virtcontainers/hypervisor_config_darwin.go b/src/runtime/virtcontainers/hypervisor_config_darwin.go index a949adf3a73c..1225271a2a4c 100644 --- a/src/runtime/virtcontainers/hypervisor_config_darwin.go +++ b/src/runtime/virtcontainers/hypervisor_config_darwin.go @@ -11,6 +11,10 @@ import ( func validateHypervisorConfig(conf *HypervisorConfig) error { + if conf.RemoteHypervisorSocket != "" { + return nil + } + if conf.KernelPath == "" { return fmt.Errorf("Missing kernel path") } diff --git a/src/runtime/virtcontainers/hypervisor_config_linux.go b/src/runtime/virtcontainers/hypervisor_config_linux.go index 8e34f98b5bc6..1bcd47218c3c 100644 --- a/src/runtime/virtcontainers/hypervisor_config_linux.go +++ b/src/runtime/virtcontainers/hypervisor_config_linux.go @@ -13,6 +13,10 @@ import ( func validateHypervisorConfig(conf *HypervisorConfig) error { + if conf.RemoteHypervisorSocket != "" { + return nil + } + if conf.KernelPath == "" { return fmt.Errorf("Missing kernel path") } diff --git a/src/runtime/virtcontainers/hypervisor_config_test.go b/src/runtime/virtcontainers/hypervisor_config_test.go index 49558f6a97a9..e51773eaafee 100644 --- a/src/runtime/virtcontainers/hypervisor_config_test.go +++ b/src/runtime/virtcontainers/hypervisor_config_test.go @@ -28,3 +28,12 @@ func TestHypervisorConfigNoKernelPath(t *testing.T) { testHypervisorConfigValid(t, hypervisorConfig, false) } + +func TestRemoteHypervisorConfigNoKernelPath(t *testing.T) { + hypervisorConfig := &HypervisorConfig{ + RemoteHypervisorSocket: "dummy_socket", + KernelPath: "", + } + + testHypervisorConfigValid(t, hypervisorConfig, true) +} diff --git a/src/runtime/virtcontainers/hypervisor_linux.go b/src/runtime/virtcontainers/hypervisor_linux.go index f419e0982f57..ba5c38881116 100644 --- a/src/runtime/virtcontainers/hypervisor_linux.go +++ b/src/runtime/virtcontainers/hypervisor_linux.go @@ -36,8 +36,12 @@ func NewHypervisor(hType HypervisorType) (Hypervisor, error) { return &Acrn{}, nil case ClhHypervisor: return &cloudHypervisor{}, nil + case StratovirtHypervisor: + return &stratovirt{}, nil case DragonballHypervisor: return &mockHypervisor{}, nil + case RemoteHypervisor: + return &remoteHypervisor{}, nil case MockHypervisor: return &mockHypervisor{}, nil default: diff --git a/src/runtime/virtcontainers/hypervisor_test.go b/src/runtime/virtcontainers/hypervisor_test.go index 19bfdf4773b1..64794249f8df 100644 --- a/src/runtime/virtcontainers/hypervisor_test.go +++ b/src/runtime/virtcontainers/hypervisor_test.go @@ -7,11 +7,12 @@ package virtcontainers import ( "fmt" - "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" - "github.com/stretchr/testify/assert" "os" "strings" "testing" + + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" + "github.com/stretchr/testify/assert" ) func TestGetKernelRootParams(t *testing.T) { @@ -186,6 +187,10 @@ func TestSetMockHypervisorType(t *testing.T) { testSetHypervisorType(t, "mock", MockHypervisor) } +func TestSetRemoteHypervisorType(t *testing.T) { + testSetHypervisorType(t, "remote", RemoteHypervisor) +} + func TestSetUnknownHypervisorType(t *testing.T) { var hypervisorType HypervisorType assert := assert.New(t) @@ -207,6 +212,11 @@ func TestStringFromQemuHypervisorType(t *testing.T) { testStringFromHypervisorType(t, hypervisorType, "qemu") } +func TestStringFromRemoteHypervisorType(t *testing.T) { + hypervisorType := RemoteHypervisor + testStringFromHypervisorType(t, hypervisorType, "remote") +} + func TestStringFromMockHypervisorType(t *testing.T) { hypervisorType := MockHypervisor testStringFromHypervisorType(t, hypervisorType, "mock") @@ -224,6 +234,12 @@ func testNewHypervisorFromHypervisorType(t *testing.T, hypervisorType Hypervisor assert.Exactly(hy, expected) } +func TestNewHypervisorFromRemoteHypervisorType(t *testing.T) { + hypervisorType := RemoteHypervisor + expectedHypervisor := &remoteHypervisor{} + testNewHypervisorFromHypervisorType(t, hypervisorType, expectedHypervisor) +} + func TestNewHypervisorFromMockHypervisorType(t *testing.T) { hypervisorType := MockHypervisor expectedHypervisor := &mockHypervisor{} diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 9bb9f138fa9d..892aa534b79e 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -78,9 +78,13 @@ const ( defaultSeLinuxContainerType = "container_t" ) +type customRequestTimeoutKeyType struct{} + var ( checkRequestTimeout = 30 * time.Second defaultRequestTimeout = 60 * time.Second + remoteRequestTimeout = 300 * time.Second + customRequestTimeoutKey = customRequestTimeoutKeyType(struct{}{}) errorMissingOCISpec = errors.New("Missing OCI specification") defaultKataHostSharedDir = "/run/kata-containers/shared/sandboxes/" defaultKataGuestSharedDir = "/run/kata-containers/shared/containers/" @@ -376,6 +380,8 @@ func (k *kataAgent) agentURL() (string, error) { return s.String(), nil case types.HybridVSock: return s.String(), nil + case types.RemoteSock: + return s.String(), nil case types.MockHybridVSock: return s.String(), nil default: @@ -426,6 +432,7 @@ func (k *kataAgent) configure(ctx context.Context, h Hypervisor, id, sharePath s if err != nil { return err } + case types.RemoteSock: case types.MockHybridVSock: default: return types.ErrInvalidConfigType @@ -745,37 +752,43 @@ func (k *kataAgent) startSandbox(ctx context.Context, sandbox *Sandbox) error { return err } - // Check grpc server is serving - if err = k.check(ctx); err != nil { - return err - } + var kmodules []*grpc.KernelModule - // If a Policy has been specified, send it to the agent. - if len(sandbox.config.AgentConfig.Policy) > 0 { - if err := sandbox.agent.setPolicy(ctx, sandbox.config.AgentConfig.Policy); err != nil { + if sandbox.config.HypervisorType == RemoteHypervisor { + ctx = context.WithValue(ctx, customRequestTimeoutKey, remoteRequestTimeout) + } else { + // Check grpc server is serving + if err = k.check(ctx); err != nil { return err } - } - // Setup network interfaces and routes - interfaces, routes, neighs, err := generateVCNetworkStructures(ctx, sandbox.network) - if err != nil { - return err - } - if err = k.updateInterfaces(ctx, interfaces); err != nil { - return err - } - if _, err = k.updateRoutes(ctx, routes); err != nil { - return err - } - if err = k.addARPNeighbors(ctx, neighs); err != nil { - return err + // If a Policy has been specified, send it to the agent. + if len(sandbox.config.AgentConfig.Policy) > 0 { + if err := sandbox.agent.setPolicy(ctx, sandbox.config.AgentConfig.Policy); err != nil { + return err + } + } + + // Setup network interfaces and routes + interfaces, routes, neighs, err := generateVCNetworkStructures(ctx, sandbox.network) + if err != nil { + return err + } + if err = k.updateInterfaces(ctx, interfaces); err != nil { + return err + } + if _, err = k.updateRoutes(ctx, routes); err != nil { + return err + } + if err = k.addARPNeighbors(ctx, neighs); err != nil { + return err + } + + kmodules = setupKernelModules(k.kmodules) } storages := setupStorages(ctx, sandbox) - kmodules := setupKernelModules(k.kmodules) - req := &grpc.CreateSandboxRequest{ Hostname: hostname, Dns: dns, @@ -1194,7 +1207,7 @@ func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*gr kataDevice = k.appendVfioDevice(dev, device, c) } - if kataDevice == nil { + if kataDevice == nil || kataDevice.Type == "" { continue } @@ -2104,7 +2117,12 @@ func (k *kataAgent) getReqContext(ctx context.Context, reqName string) (newCtx c case grpcCheckRequest: newCtx, cancel = context.WithTimeout(ctx, checkRequestTimeout) default: - newCtx, cancel = context.WithTimeout(ctx, defaultRequestTimeout) + var requestTimeout = defaultRequestTimeout + + if timeout, ok := ctx.Value(customRequestTimeoutKey).(time.Duration); ok { + requestTimeout = timeout + } + newCtx, cancel = context.WithTimeout(ctx, requestTimeout) } return newCtx, cancel diff --git a/src/runtime/virtcontainers/pkg/agent/protocols/client/client.go b/src/runtime/virtcontainers/pkg/agent/protocols/client/client.go index b31c86ad84b4..b44ee0d34ef5 100644 --- a/src/runtime/virtcontainers/pkg/agent/protocols/client/client.go +++ b/src/runtime/virtcontainers/pkg/agent/protocols/client/client.go @@ -34,6 +34,7 @@ import ( const ( VSockSocketScheme = "vsock" HybridVSockScheme = "hvsock" + RemoteSockScheme = "remote" MockHybridVSockScheme = "mock" ) @@ -235,6 +236,11 @@ func parse(sock string) (string, *url.URL, error) { } hybridVSockPort = uint32(port) grpcAddr = HybridVSockScheme + ":" + hvsocket[0] + case RemoteSockScheme: + if addr.Host != "" { + return "", nil, grpcStatus.Errorf(codes.InvalidArgument, "Invalid remote sock scheme: host address must be empty: %s", sock) + } + grpcAddr = RemoteSockScheme + ":" + addr.Path // just for tests use. case MockHybridVSockScheme: if addr.Path == "" { @@ -255,6 +261,8 @@ func agentDialer(addr *url.URL) dialer { return VsockDialer case HybridVSockScheme: return HybridVSockDialer + case RemoteSockScheme: + return RemoteSockDialer case MockHybridVSockScheme: return MockHybridVSockDialer default: @@ -435,6 +443,31 @@ func HybridVSockDialer(sock string, timeout time.Duration) (net.Conn, error) { return commonDialer(timeout, dialFunc, timeoutErr) } +// RemoteSockDialer dials to an agent in a remote hypervisor sandbox +func RemoteSockDialer(sock string, timeout time.Duration) (net.Conn, error) { + + s := strings.Split(sock, ":") + if !(len(s) == 2 && s[0] == RemoteSockScheme) { + return nil, fmt.Errorf("failed to parse remote sock: %q", sock) + } + socketPath := s[1] + + logrus.Printf("Dialing remote sock: %q %q", socketPath, sock) + + dialFunc := func() (net.Conn, error) { + conn, err := net.Dial("unix", socketPath) + if err != nil { + logrus.Errorf("failed to dial remote sock %q: %v", socketPath, err) + return nil, err + } + return conn, nil + } + + timeoutErr := grpcStatus.Errorf(codes.DeadlineExceeded, "timed out connecting to remote sock: %s", socketPath) + + return commonDialer(timeout, dialFunc, timeoutErr) +} + // just for tests use. func MockHybridVSockDialer(sock string, timeout time.Duration) (net.Conn, error) { sock = strings.TrimPrefix(sock, "mock:") diff --git a/src/runtime/virtcontainers/remote.go b/src/runtime/virtcontainers/remote.go new file mode 100644 index 000000000000..edb77cd9537b --- /dev/null +++ b/src/runtime/virtcontainers/remote.go @@ -0,0 +1,293 @@ +// Copyright (c) 2022 IBM Corporation +// SPDX-License-Identifier: Apache-2.0 + +package virtcontainers + +import ( + "context" + "fmt" + "os" + "strconv" + "time" + + cri "github.com/containerd/containerd/pkg/cri/annotations" + persistapi "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors" + pb "github.com/kata-containers/kata-containers/src/runtime/protocols/hypervisor" + hypannotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +const defaultMinTimeout = 60 + +type remoteHypervisor struct { + sandboxID remoteHypervisorSandboxID + agentSocketPath string + config HypervisorConfig +} + +type remoteHypervisorSandboxID string + +type remoteService struct { + conn *grpc.ClientConn + client pb.HypervisorClient +} + +func openRemoteService(socketPath string) (*remoteService, error) { + + conn, err := grpc.Dial(fmt.Sprintf("unix://%s", socketPath), grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return nil, fmt.Errorf("failed to connect to remote hypervisor socket: %w", err) + } + defer conn.Close() + client := pb.NewHypervisorClient(conn) + + s := &remoteService{ + conn: conn, + client: client, + } + + return s, nil +} + +func (s *remoteService) Close() error { + return s.conn.Close() +} + +func (rh *remoteHypervisor) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error { + + rh.sandboxID = remoteHypervisorSandboxID(id) + + if err := rh.setConfig(hypervisorConfig); err != nil { + return err + } + + s, err := openRemoteService(hypervisorConfig.RemoteHypervisorSocket) + if err != nil { + return err + } + defer s.Close() + + annotations := map[string]string{} + annotations[cri.SandboxName] = hypervisorConfig.SandboxName + annotations[cri.SandboxNamespace] = hypervisorConfig.SandboxNamespace + annotations[hypannotations.MachineType] = hypervisorConfig.HypervisorMachineType + annotations[hypannotations.DefaultVCPUs] = strconv.FormatUint(uint64(hypervisorConfig.NumVCPUs()), 10) + annotations[hypannotations.DefaultMemory] = strconv.FormatUint(uint64(hypervisorConfig.MemorySize), 10) + + req := &pb.CreateVMRequest{ + Id: id, + Annotations: annotations, + NetworkNamespacePath: network.NetworkID(), + } + + res, err := s.client.CreateVM(ctx, req) + if err != nil { + return fmt.Errorf("remote hypervisor call failed: %w", err) + } + + if res.AgentSocketPath == "" { + return errors.New("remote hypervisor does not return tunnel socket path") + } + + rh.agentSocketPath = res.AgentSocketPath + + return nil +} + +func (rh *remoteHypervisor) StartVM(ctx context.Context, timeout int) error { + + minTimeout := defaultMinTimeout + if rh.config.RemoteHypervisorTimeout > 0 { + minTimeout = int(rh.config.RemoteHypervisorTimeout) + } + + if timeout < minTimeout { + timeout = minTimeout + } + + s, err := openRemoteService(rh.config.RemoteHypervisorSocket) + if err != nil { + return err + } + defer s.Close() + + req := &pb.StartVMRequest{ + Id: string(rh.sandboxID), + } + + ctx2, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second) + defer cancel() + + logrus.Printf("calling remote hypervisor StartVM (timeout: %d)", timeout) + + if _, err := s.client.StartVM(ctx2, req); err != nil { + return fmt.Errorf("remote hypervisor call failed: %w", err) + } + + return nil +} + +func (rh *remoteHypervisor) AttestVM(ctx context.Context) error { + return nil +} + +func (rh *remoteHypervisor) StopVM(ctx context.Context, waitOnly bool) error { + + s, err := openRemoteService(rh.config.RemoteHypervisorSocket) + if err != nil { + return err + } + defer s.Close() + + req := &pb.StopVMRequest{ + Id: string(rh.sandboxID), + } + + if _, err := s.client.StopVM(ctx, req); err != nil { + return fmt.Errorf("remote hypervisor call failed: %w", err) + } + + return nil +} + +func (rh *remoteHypervisor) GenerateSocket(id string) (interface{}, error) { + + socketPath := rh.agentSocketPath + if len(socketPath) == 0 { + return nil, errors.New("failed to generate remote sock: TunnelSocketPath is not set") + } + + remoteSock := types.RemoteSock{ + SandboxID: id, + TunnelSocketPath: socketPath, + } + + return remoteSock, nil +} + +func notImplemented(name string) error { + + err := errors.Errorf("%s: not implemented", name) + + logrus.Errorf(err.Error()) + + if tracer, ok := err.(interface{ StackTrace() errors.StackTrace }); ok { + for _, f := range tracer.StackTrace() { + logrus.Errorf("%+s:%d\n", f, f) + } + } + + return err +} + +func (rh *remoteHypervisor) PauseVM(ctx context.Context) error { + return notImplemented("PauseVM") +} + +func (rh *remoteHypervisor) SaveVM() error { + return notImplemented("SaveVM") +} + +func (rh *remoteHypervisor) ResumeVM(ctx context.Context) error { + return notImplemented("ResumeVM") +} + +func (rh *remoteHypervisor) AddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) error { + // TODO should we return notImplemented("AddDevice"), rather than nil and ignoring it? + logrus.Printf("addDevice: deviceType=%v devInfo=%#v", devType, devInfo) + return nil +} + +func (rh *remoteHypervisor) HotplugAddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error) { + return nil, notImplemented("HotplugAddDevice") +} + +func (rh *remoteHypervisor) HotplugRemoveDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error) { + return nil, notImplemented("HotplugRemoveDevice") +} + +func (rh *remoteHypervisor) ResizeMemory(ctx context.Context, memMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, MemoryDevice, error) { + return memMB, MemoryDevice{}, nil +} + +func (rh *remoteHypervisor) GetTotalMemoryMB(ctx context.Context) uint32 { + //The remote hypervisor uses the peer pod config to determine the memory of the VM, so we need to use static resource management + logrus.Error("GetTotalMemoryMB - remote hypervisor cannot update resources") + return 0 +} + +func (rh *remoteHypervisor) ResizeVCPUs(ctx context.Context, vcpus uint32) (uint32, uint32, error) { + return vcpus, vcpus, nil +} + +func (rh *remoteHypervisor) GetVMConsole(ctx context.Context, sandboxID string) (string, string, error) { + return "", "", notImplemented("GetVMConsole") +} + +func (rh *remoteHypervisor) Disconnect(ctx context.Context) { + notImplemented("Disconnect") +} + +func (rh *remoteHypervisor) Capabilities(ctx context.Context) types.Capabilities { + var caps types.Capabilities + caps.SetBlockDeviceHotplugSupport() + return caps +} + +func (rh *remoteHypervisor) HypervisorConfig() HypervisorConfig { + return rh.config +} + +func (rh *remoteHypervisor) GetThreadIDs(ctx context.Context) (VcpuThreadIDs, error) { + // Not supported. return success + // Just allocating an empty map + return VcpuThreadIDs{}, nil +} + +func (rh *remoteHypervisor) Cleanup(ctx context.Context) error { + return nil +} + +func (rh *remoteHypervisor) setConfig(config *HypervisorConfig) error { + // Create a Validator specific for remote hypervisor + rh.config = *config + + return nil +} + +func (rh *remoteHypervisor) GetPids() []int { + // let's use shim pid as it used by crio to fetch start time + return []int{os.Getpid()} +} + +func (rh *remoteHypervisor) GetVirtioFsPid() *int { + panic(notImplemented("GetVirtioFsPid")) +} + +func (rh *remoteHypervisor) fromGrpc(ctx context.Context, hypervisorConfig *HypervisorConfig, j []byte) error { + panic(notImplemented("fromGrpc")) +} + +func (rh *remoteHypervisor) toGrpc(ctx context.Context) ([]byte, error) { + panic(notImplemented("toGrpc")) +} + +func (rh *remoteHypervisor) Check() error { + return nil +} + +func (rh *remoteHypervisor) Save() persistapi.HypervisorState { + return persistapi.HypervisorState{} +} + +func (rh *remoteHypervisor) Load(persistapi.HypervisorState) { + notImplemented("Load") +} + +func (rh *remoteHypervisor) IsRateLimiterBuiltin() bool { + return false +} diff --git a/src/runtime/virtcontainers/remote_test.go b/src/runtime/virtcontainers/remote_test.go new file mode 100644 index 000000000000..36d52b7aca41 --- /dev/null +++ b/src/runtime/virtcontainers/remote_test.go @@ -0,0 +1,45 @@ +// Copyright (c) 2023 IBM Corporation +// SPDX-License-Identifier: Apache-2.0 + +package virtcontainers + +import ( + "testing" + + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" + "github.com/stretchr/testify/assert" +) + +func newRemoteConfig() HypervisorConfig { + return HypervisorConfig{ + RemoteHypervisorSocket: "/run/peerpod/hypervisor.sock", + RemoteHypervisorTimeout: 600, + DisableGuestSeLinux: true, + EnableAnnotations: []string{}, + } +} + +func TestRemoteHypervisorGenerateSocket(t *testing.T) { + assert := assert.New(t) + + remoteHypervisor := remoteHypervisor{ + config: newRemoteConfig(), + } + id := "sandboxId" + + // No socketPath should error + _, err := remoteHypervisor.GenerateSocket(id) + assert.Error(err) + + socketPath := "socketPath" + remoteHypervisor.agentSocketPath = socketPath + + result, err := remoteHypervisor.GenerateSocket(id) + assert.NoError(err) + + expected := types.RemoteSock{ + SandboxID: id, + TunnelSocketPath: socketPath, + } + assert.Equal(result, expected) +} diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 9762467411ae..d921988292da 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -28,6 +28,8 @@ import ( "github.com/sirupsen/logrus" "github.com/vishvananda/netlink" + cri "github.com/containerd/containerd/pkg/cri/annotations" + crio "github.com/containers/podman/v4/pkg/annotations" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/api" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/drivers" @@ -635,6 +637,8 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor } + setHypervisorConfigAnnotations(&sandboxConfig) + coldPlugVFIO, err := s.coldOrHotPlugVFIO(&sandboxConfig) if err != nil { return nil, err @@ -722,6 +726,23 @@ func (s *Sandbox) coldOrHotPlugVFIO(sandboxConfig *SandboxConfig) (bool, error) return coldPlugVFIO, nil } +func setHypervisorConfigAnnotations(sandboxConfig *SandboxConfig) { + if len(sandboxConfig.Containers) > 0 { + // These values are required by remote hypervisor + for _, a := range []string{cri.SandboxName, crio.SandboxName} { + if value, ok := sandboxConfig.Containers[0].Annotations[a]; ok { + sandboxConfig.HypervisorConfig.SandboxName = value + } + } + + for _, a := range []string{cri.SandboxNamespace, crio.Namespace} { + if value, ok := sandboxConfig.Containers[0].Annotations[a]; ok { + sandboxConfig.HypervisorConfig.SandboxNamespace = value + } + } + } +} + func (s *Sandbox) createResourceController() error { var err error cgroupPath := "" diff --git a/src/runtime/virtcontainers/stratovirt.go b/src/runtime/virtcontainers/stratovirt.go new file mode 100644 index 000000000000..9bfab6bf44a2 --- /dev/null +++ b/src/runtime/virtcontainers/stratovirt.go @@ -0,0 +1,1306 @@ +//go:build linux + +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import ( + "bufio" + "context" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "regexp" + "strconv" + "strings" + "sync/atomic" + "syscall" + "time" + + "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" + govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu" + hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors" + "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" + "github.com/kata-containers/kata-containers/src/runtime/pkg/uuid" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +// stratovirtTracingTags defines tags for the trace span +var stratovirtTracingTags = map[string]string{ + "source": "runtime", + "package": "virtcontainers", + "subsystem": "hypervisor", + "type": "stratovirt", +} + +// Constants and type definitions related to StratoVirt hypervisor +const ( + stratovirtStopSandboxTimeoutSecs = 15 + defaultStratoVirt = "/usr/bin/stratovirt" + defaultStratoVirtMachineType = "microvm" + apiSocket = "qmp.socket" + debugSocket = "console.socket" + virtiofsSocket = "virtiofs_kata.socket" + nydusdSock = "nydusd_kata.socket" + maxMmioBlkCount = 4 + machineTypeMicrovm = "microvm" + mmioBus VirtioDriver = "mmio" +) + +var defaultKernelParames = []Param{ + {"reboot", "k"}, + {"panic", "1"}, + {"net.ifnames", "0"}, + {"ramdom.trust_cpu", "on"}, +} + +var defaultMicroVMParames = []Param{ + {"pci", "off"}, + {"iommu", "off"}, + {"acpi", "off"}, +} + +var ( + blkDriver = map[VirtioDriver]string{ + mmioBus: "virtio-blk-device", + } + netDriver = map[VirtioDriver]string{ + mmioBus: "virtio-net-device", + } + virtiofsDriver = map[VirtioDriver]string{ + mmioBus: "vhost-user-fs-device", + } + vsockDriver = map[VirtioDriver]string{ + mmioBus: "vhost-vsock-device", + } + rngDriver = map[VirtioDriver]string{ + mmioBus: "virtio-rng-device", + } + consoleDriver = map[VirtioDriver]string{ + mmioBus: "virtio-serial-device", + } +) + +// VirtioDev is the StratoVirt device interface. +type VirtioDev interface { + getParams(config *StratovirtConfig) []string +} + +type VirtioDriver string + +type blkDevice struct { + id string + filePath string + driver VirtioDriver + deviceID string +} + +func (b blkDevice) getParams(config *StratovirtConfig) []string { + var params []string + var driveParams []Param + var devParams []Param + + driver := blkDriver[b.driver] + driveParams = append(driveParams, Param{"id", b.id}) + driveParams = append(driveParams, Param{"file", b.filePath}) + driveParams = append(driveParams, Param{"readonly", "on"}) + driveParams = append(driveParams, Param{"direct", "off"}) + + devParams = append(devParams, Param{"drive", b.id}) + devParams = append(devParams, Param{"id", b.deviceID}) + + params = append(params, "-drive", strings.Join(SerializeParams(driveParams, "="), ",")) + params = append(params, "-device", fmt.Sprintf("%s,%s", driver, strings.Join(SerializeParams(devParams, "="), ","))) + return params +} + +type netDevice struct { + devType string + id string + ifname string + driver VirtioDriver + netdev string + deviceID string + FDs []*os.File + mac string +} + +func (n netDevice) getParams(config *StratovirtConfig) []string { + var params []string + var netdevParams []Param + var devParams []Param + + driver := netDriver[n.driver] + netdevParams = append(netdevParams, Param{"id", n.id}) + if len(n.FDs) > 0 { + var fdParams []string + + FDs := config.appendFDs(n.FDs) + for _, fd := range FDs { + fdParams = append(fdParams, fmt.Sprintf("%d", fd)) + } + netdevParams = append(netdevParams, Param{"fds", strings.Join(fdParams, ":")}) + } else if n.ifname != "" { + netdevParams = append(netdevParams, Param{"ifname", n.ifname}) + } + + devParams = append(devParams, Param{"netdev", n.id}) + devParams = append(devParams, Param{"id", n.deviceID}) + if n.mac != "" { + devParams = append(devParams, Param{"mac", n.mac}) + } + + params = append(params, "-netdev", fmt.Sprintf("%s,%s", n.devType, strings.Join(SerializeParams(netdevParams, "="), ","))) + params = append(params, "-device", fmt.Sprintf("%s,%s", driver, strings.Join(SerializeParams(devParams, "="), ","))) + return params +} + +type virtioFs struct { + driver VirtioDriver + backend string + charID string + charDev string + tag string + deviceID string +} + +func (v virtioFs) getParams(config *StratovirtConfig) []string { + var params []string + var charParams []Param + var fsParams []Param + + driver := virtiofsDriver[v.driver] + charParams = append(charParams, Param{"id", v.charID}) + charParams = append(charParams, Param{"path", config.fsSockPath}) + + fsParams = append(fsParams, Param{"chardev", v.charDev}) + fsParams = append(fsParams, Param{"tag", v.tag}) + fsParams = append(fsParams, Param{"id", v.deviceID}) + + params = append(params, "-chardev", fmt.Sprintf("%s,%s,server,nowait", v.backend, strings.Join(SerializeParams(charParams, "="), ","))) + params = append(params, "-device", fmt.Sprintf("%s,%s", driver, strings.Join(SerializeParams(fsParams, "="), ","))) + return params +} + +type vhostVsock struct { + driver VirtioDriver + id string + guestID string + VHostFD *os.File +} + +func (v vhostVsock) getParams(config *StratovirtConfig) []string { + var params []string + var devParams []Param + + driver := vsockDriver[v.driver] + devParams = append(devParams, Param{"id", v.id}) + devParams = append(devParams, Param{"guest-cid", v.guestID}) + + if v.VHostFD != nil { + FDs := config.appendFDs([]*os.File{v.VHostFD}) + devParams = append(devParams, Param{"vhostfd", fmt.Sprintf("%d", FDs[0])}) + } + + params = append(params, "-device", fmt.Sprintf("%s,%s", driver, strings.Join(SerializeParams(devParams, "="), ","))) + return params +} + +type rngDevice struct { + id string + fileName string + driver VirtioDriver + deviceID string + rng string +} + +func (r rngDevice) getParams(config *StratovirtConfig) []string { + var params []string + var objParams []Param + var devParams []Param + + driver := rngDriver[r.driver] + objParams = append(objParams, Param{"id", r.id}) + objParams = append(objParams, Param{"filename", r.fileName}) + + devParams = append(devParams, Param{"rng", r.rng}) + devParams = append(devParams, Param{"id", r.deviceID}) + + params = append(params, "-object", fmt.Sprintf("rng-random,%s", strings.Join(SerializeParams(objParams, "="), ","))) + params = append(params, "-device", fmt.Sprintf("%s,%s", driver, strings.Join(SerializeParams(devParams, "="), ","))) + return params +} + +type consoleDevice struct { + driver VirtioDriver + id string + backend string + charID string + devType string + charDev string + deviceID string +} + +func (c consoleDevice) getParams(config *StratovirtConfig) []string { + var params []string + var devParams []Param + var charParams []Param + var conParams []Param + + driver := consoleDriver[c.driver] + if c.id != "" { + devParams = append(devParams, Param{"id", c.id}) + } + + conParams = append(conParams, Param{"chardev", c.charDev}) + conParams = append(conParams, Param{"id", c.deviceID}) + params = append(params, "-device", fmt.Sprintf("%s,%s", driver, strings.Join(SerializeParams(devParams, "="), ","))) + + charParams = append(charParams, Param{"id", c.charID}) + charParams = append(charParams, Param{"path", config.consolePath}) + params = append(params, "-chardev", fmt.Sprintf("%s,%s,server,nowait", c.backend, strings.Join(SerializeParams(charParams, "="), ","))) + params = append(params, "-device", fmt.Sprintf("%s,%s,nr=0", c.devType, strings.Join(SerializeParams(conParams, "="), ","))) + return params +} + +// StratovirtConfig keeps the custom settings and parameters to start virtual machine. +type StratovirtConfig struct { + name string + uuid string + machineType string + vmPath string + smp uint32 + memory uint64 + kernelPath string + kernelAdditionalParams string + rootfsPath string + initrdPath string + devices []VirtioDev + qmpSocketPath govmmQemu.QMPSocket + consolePath string + fsSockPath string + fds []*os.File +} + +func (config *StratovirtConfig) appendFDs(fds []*os.File) []int { + var fdInts []int + + oldLen := len(config.fds) + + config.fds = append(config.fds, fds...) + + // The magic 3 offset comes from https://golang.org/src/os/exec/exec.go: + // ExtraFiles specifies additional open files to be inherited by the + // new process. It does not include standard input, standard output, or + // standard error. If non-nil, entry i becomes file descriptor 3+i. + // This means that arbitrary file descriptors fd0, fd1... fdN passed in + // the array will be presented to the guest as consecutive descriptors + // 3, 4... N+3. The golang library internally relies on dup2() to do + // the renumbering. + for i := range fds { + fdInts = append(fdInts, oldLen+3+i) + } + + return fdInts +} + +// State keeps StratoVirt device and pids state. +type State struct { + mmioBlkSlots [maxMmioBlkCount]bool + pid int + virtiofsPid int +} + +type stratovirt struct { + id string + path string + ctx context.Context + fds []*os.File + config HypervisorConfig + qmpMonitorCh qmpChannel + svConfig StratovirtConfig + state State + stopped atomic.Bool + virtiofsDaemon VirtiofsDaemon +} + +func (s *stratovirt) getKernelParams(machineType string, initrdPath string) (string, error) { + var kernelParams []Param + + if initrdPath == "" { + params, err := GetKernelRootParams(s.config.RootfsType, true, false) + if err != nil { + return "", err + } + kernelParams = params + } + + // Take the default parameters. + kernelParams = append(kernelParams, defaultKernelParames...) + if machineType == "microvm" { + kernelParams = append(kernelParams, defaultMicroVMParames...) + } + + if s.config.Debug { + kernelParams = append(kernelParams, []Param{ + {"debug", ""}, + {"console", "hvc0"}, + }...) + } else { + kernelParams = append(kernelParams, []Param{ + {"quiet", ""}, + {"8250.nr_uarts", "0"}, + {"agent.log_vport", fmt.Sprintf("%d", vSockLogsPort)}, + }...) + } + + kernelParams = append(s.config.KernelParams, kernelParams...) + strParams := SerializeParams(kernelParams, "=") + + return strings.Join(strParams, " "), nil +} + +func (s *stratovirt) createQMPSocket(vmPath string) govmmQemu.QMPSocket { + socketPath := filepath.Join(vmPath, apiSocket) + + s.qmpMonitorCh = qmpChannel{ + ctx: s.ctx, + path: socketPath, + } + + return govmmQemu.QMPSocket{ + Type: "unix", + Name: s.qmpMonitorCh.path, + Server: true, + NoWait: true, + } +} + +// Logger returns a logrus logger appropriate for logging StratoVirt messages +func (s *stratovirt) Logger() *logrus.Entry { + return virtLog.WithField("subsystem", "stratovirt") +} + +func (s *stratovirt) consoleSocketPath(id string) (string, error) { + return utils.BuildSocketPath(s.config.VMStorePath, id, debugSocket) +} + +func (s *stratovirt) virtiofsSocketPath(id string) (string, error) { + return utils.BuildSocketPath(s.config.VMStorePath, id, virtiofsSocket) +} + +func (s *stratovirt) nydusdSocketPath(id string) (string, error) { + return utils.BuildSocketPath(s.config.VMStorePath, id, nydusdSock) +} + +func (s *stratovirt) qmpSetup() error { + s.qmpMonitorCh.Lock() + defer s.qmpMonitorCh.Unlock() + + if s.qmpMonitorCh.qmp != nil { + return nil + } + + events := make(chan govmmQemu.QMPEvent) + go s.loopQMPEvent(events) + + cfg := govmmQemu.QMPConfig{ + Logger: newQMPLogger(), + EventCh: events, + } + + // Auto-closed by QMPStart(). + disconnectCh := make(chan struct{}) + + qmp, _, err := govmmQemu.QMPStart(s.qmpMonitorCh.ctx, s.qmpMonitorCh.path, cfg, disconnectCh) + if err != nil { + s.Logger().WithError(err).Error("Failed to connect to StratoVirt instance") + return err + } + + err = qmp.ExecuteQMPCapabilities(s.qmpMonitorCh.ctx) + if err != nil { + qmp.Shutdown() + s.Logger().WithError(err).Error(qmpCapErrMsg) + return err + } + s.qmpMonitorCh.qmp = qmp + s.qmpMonitorCh.disconn = disconnectCh + + return nil +} + +func (s *stratovirt) loopQMPEvent(event chan govmmQemu.QMPEvent) { + for e := range event { + s.Logger().WithField("event", e).Debug("got QMP event") + } + s.Logger().Infof("QMP event channel closed") +} + +func (s *stratovirt) qmpShutdown() { + s.qmpMonitorCh.Lock() + defer s.qmpMonitorCh.Unlock() + + if s.qmpMonitorCh.qmp != nil { + s.qmpMonitorCh.qmp.Shutdown() + // wait on disconnected channel to be sure that the qmp + // been closed cleanly. + <-s.qmpMonitorCh.disconn + s.qmpMonitorCh.qmp = nil + s.qmpMonitorCh.disconn = nil + } +} + +func (s *stratovirt) createDevices() []VirtioDev { + var devices []VirtioDev + ctx := s.ctx + + // Set random device. + devices = s.appendRng(ctx, devices) + + // Set serial console device for Debug. + if s.config.Debug { + devices = s.appendConsole(ctx, devices) + } + + if s.svConfig.initrdPath == "" { + devices = s.appendBlock(ctx, devices) + if s.svConfig.machineType == machineTypeMicrovm { + s.state.mmioBlkSlots[0] = true + } + } + + return devices +} + +func (s *stratovirt) appendBlock(ctx context.Context, devices []VirtioDev) []VirtioDev { + devices = append(devices, blkDevice{ + id: "rootfs", + filePath: s.svConfig.rootfsPath, + deviceID: "virtio-blk0", + driver: mmioBus, + }) + + return devices +} + +func (s *stratovirt) appendRng(ctx context.Context, devices []VirtioDev) []VirtioDev { + devices = append(devices, rngDevice{ + id: "objrng0", + fileName: s.config.EntropySource, + rng: "objrng0", + deviceID: "virtio-rng0", + driver: mmioBus, + }) + + return devices +} + +func (s *stratovirt) appendConsole(ctx context.Context, devices []VirtioDev) []VirtioDev { + devices = append(devices, consoleDevice{ + id: "virtio-serial0", + backend: "socket", + charID: "charconsole0", + devType: "virtconsole", + charDev: "charconsole0", + deviceID: "virtio-console0", + driver: mmioBus, + }) + + return devices +} + +func (s *stratovirt) appendVhostVsock(ctx context.Context, devices []VirtioDev, vsock types.VSock) []VirtioDev { + devices = append(devices, vhostVsock{ + id: "vsock-id", + guestID: fmt.Sprintf("%d", vsock.ContextID), + VHostFD: vsock.VhostFd, + driver: mmioBus, + }) + + return devices +} + +func (s *stratovirt) appendNetwork(ctx context.Context, devices []VirtioDev, endpoint Endpoint) []VirtioDev { + name := endpoint.Name() + + devices = append(devices, netDevice{ + devType: "tap", + id: name, + ifname: endpoint.NetworkPair().TapInterface.TAPIface.Name, + netdev: name, + deviceID: name, + FDs: endpoint.NetworkPair().TapInterface.VMFds, + mac: endpoint.HardwareAddr(), + driver: mmioBus, + }) + + return devices +} + +func (s *stratovirt) appendVirtioFs(ctx context.Context, devices []VirtioDev, volume types.Volume) []VirtioDev { + if s.config.SharedFS != config.VirtioFS && s.config.SharedFS != config.VirtioFSNydus { + return devices + } + name := "virtio_fs" + + devices = append(devices, virtioFs{ + backend: "socket", + // Virtio-fs must be bound to unique charDev, it uses the same name. + charID: name, + charDev: name, + tag: volume.MountTag, + deviceID: "virtio-fs0", + driver: mmioBus, + }) + + return devices +} + +func (s *stratovirt) setVMConfig(id string, hypervisorConfig *HypervisorConfig) error { + span, _ := katatrace.Trace(s.ctx, s.Logger(), "setStratoVirtUp", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) + defer span.End() + + if err := validateHypervisorConfig(hypervisorConfig); err != nil { + return err + } + + s.id = id + if err := s.setConfig(hypervisorConfig); err != nil { + return err + } + + machineType := strings.ToLower(s.config.HypervisorMachineType) + if machineType == "" { + machineType = defaultStratoVirtMachineType + } + + initrdPath, err := s.config.InitrdAssetPath() + if err != nil { + return err + } + + imagePath, err := s.config.ImageAssetPath() + if err != nil { + return err + } + + kernelPath, err := s.config.KernelAssetPath() + if err != nil { + return err + } + + kernelParams, err := s.getKernelParams(machineType, initrdPath) + if err != nil { + return err + } + + vmPath := filepath.Join(s.config.VMStorePath, s.id) + qmpSocket := s.createQMPSocket(vmPath) + + s.svConfig = StratovirtConfig{ + name: fmt.Sprintf("sandbox-%s", id), + uuid: uuid.Generate().String(), + machineType: machineType, + vmPath: vmPath, + smp: s.config.NumVCPUs(), + memory: uint64(s.config.MemorySize), + kernelPath: kernelPath, + kernelAdditionalParams: kernelParams, + rootfsPath: imagePath, + initrdPath: initrdPath, + qmpSocketPath: qmpSocket, + consolePath: filepath.Join(vmPath, debugSocket), + fsSockPath: filepath.Join(vmPath, virtiofsSocket), + } + + s.svConfig.devices = s.createDevices() + + return nil +} + +func (s *stratovirt) setupVirtiofsDaemon(ctx context.Context) (err error) { + if s.config.SharedFS == config.NoSharedFS { + return nil + } + + if s.virtiofsDaemon == nil { + return errors.New("No stratovirt virtiofsDaemon configuration") + } + + s.Logger().Info("Starting virtiofsDaemon") + + pid, err := s.virtiofsDaemon.Start(ctx, func() { + s.StopVM(ctx, false) + }) + if err != nil { + return err + } + s.state.virtiofsPid = pid + + return nil +} + +func (s *stratovirt) stopVirtiofsDaemon(ctx context.Context) (err error) { + if s.state.virtiofsPid == 0 { + s.Logger().Warn("The virtiofsd had stopped") + return nil + } + + err = s.virtiofsDaemon.Stop(ctx) + if err != nil { + return err + } + + s.state.virtiofsPid = 0 + + return nil +} + +// Get StratoVirt binary path. +func (s *stratovirt) binPath() (string, error) { + path, err := s.config.HypervisorAssetPath() + if err != nil { + return "", err + } + + if path == "" { + path = defaultStratoVirt + } + + if _, err = os.Stat(path); os.IsNotExist(err) { + return "", fmt.Errorf("StratoVirt path (%s) does not exist", path) + } + return path, nil +} + +// Log StratoVirt errors and ensure the StratoVirt process is reaped after +// termination +func (s *stratovirt) logAndWait(stratovirtCmd *exec.Cmd, reader io.ReadCloser) { + s.state.pid = stratovirtCmd.Process.Pid + s.Logger().Infof("Start logging StratoVirt (Pid=%d)", s.state.pid) + scanner := bufio.NewScanner(reader) + infoRE := regexp.MustCompile("([^:]):INFO: ") + warnRE := regexp.MustCompile("([^:]):WARN: ") + for scanner.Scan() { + text := scanner.Text() + if infoRE.MatchString(text) { + text = infoRE.ReplaceAllString(text, "$1") + s.Logger().WithField("StratoVirt Pid", s.state.pid).Info(text) + } else if warnRE.MatchString(text) { + text = infoRE.ReplaceAllString(text, "$1") + s.Logger().WithField("StratoVirt Pid", s.state.pid).Warn(text) + } else { + s.Logger().WithField("StratoVirt Pid", s.state.pid).Error(text) + } + } + s.Logger().Infof("Stop logging StratoVirt (Pid=%d)", s.state.pid) + stratovirtCmd.Wait() +} + +// waitVM will wait for the Sandbox's VM to be up and running. +func (s *stratovirt) waitVM(ctx context.Context, timeout int) error { + span, _ := katatrace.Trace(ctx, s.Logger(), "waitVM", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) + defer span.End() + + if timeout < 0 { + return fmt.Errorf("Invalid timeout %ds", timeout) + } + + cfg := govmmQemu.QMPConfig{Logger: newQMPLogger()} + + var qmp *govmmQemu.QMP + var disconnectCh chan struct{} + var ver *govmmQemu.QMPVersion + var err error + + // clear andy possible old state before trying to connect again. + s.qmpShutdown() + timeStart := time.Now() + for { + disconnectCh = make(chan struct{}) + qmp, ver, err = govmmQemu.QMPStart(s.qmpMonitorCh.ctx, s.qmpMonitorCh.path, cfg, disconnectCh) + if err == nil { + break + } + + if int(time.Since(timeStart).Seconds()) > timeout { + return fmt.Errorf("Failed to connect StratoVirt instance (timeout %ds): %v", timeout, err) + } + + time.Sleep(time.Duration(50) * time.Millisecond) + } + s.qmpMonitorCh.qmp = qmp + s.qmpMonitorCh.disconn = disconnectCh + defer s.qmpShutdown() + + s.Logger().WithFields(logrus.Fields{ + "qmp-major-version": ver.Major, + "qmp-minor-version": ver.Minor, + "qmp-micro-version": ver.Micro, + "qmp-Capabilities": strings.Join(ver.Capabilities, ","), + }).Infof("QMP details") + + if err = s.qmpMonitorCh.qmp.ExecuteQMPCapabilities(s.qmpMonitorCh.ctx); err != nil { + s.Logger().WithError(err).Error(qmpCapErrMsg) + return err + } + + return nil +} + +func (s *stratovirt) createParams(params *[]string) { + *params = append(*params, "-name", s.svConfig.name) + *params = append(*params, "-uuid", s.svConfig.uuid) + *params = append(*params, "-smp", strconv.Itoa(int(s.svConfig.smp))) + *params = append(*params, "-m", strconv.Itoa(int(s.svConfig.memory))) + *params = append(*params, "-kernel", s.svConfig.kernelPath) + *params = append(*params, "-append", s.svConfig.kernelAdditionalParams) + *params = append(*params, "-qmp", fmt.Sprintf("%s:%s,server,nowait", s.svConfig.qmpSocketPath.Type, s.svConfig.qmpSocketPath.Name)) + *params = append(*params, "-D") + *params = append(*params, "-disable-seccomp") + + if s.config.SharedFS == config.VirtioFS || s.config.SharedFS == config.VirtioFSNydus { + *params = append(*params, "-machine", fmt.Sprintf("type=%s,dump-guest-core=off,mem-share=on", s.svConfig.machineType)) + } else { + *params = append(*params, "-machine", fmt.Sprintf("type=%s,dump-guest-core=off", s.svConfig.machineType)) + } + + if s.svConfig.initrdPath != "" { + *params = append(*params, "-initrd", s.svConfig.initrdPath) + } + + for _, d := range s.svConfig.devices { + *params = append(*params, d.getParams(&s.svConfig)...) + } +} + +// cleanupVM will remove generated files and directories related with VM. +func (s *stratovirt) cleanupVM(force bool) error { + link, err := filepath.EvalSymlinks(s.svConfig.vmPath) + if err != nil { + s.Logger().WithError(err).Warn("Failed to get evaluation of any symbolic links.") + } + + s.Logger().WithFields(logrus.Fields{ + "link": link, + "dir": s.svConfig.vmPath, + }).Infof("cleanup vm path") + + if err := os.RemoveAll(s.svConfig.vmPath); err != nil { + if !force { + return err + } + s.Logger().WithError(err).Warnf("Failed to clean up vm dir %s", s.svConfig.vmPath) + } + + if link != s.svConfig.vmPath && link != "" { + if errRemove := os.RemoveAll(link); errRemove != nil { + if !force { + return err + } + s.Logger().WithError(errRemove).WithField("link", link).Warnf("Failed to remove vm path link %s", link) + } + } + + if s.config.VMid != "" { + dir := filepath.Join(s.config.VMStorePath, s.config.VMid) + if err := os.RemoveAll(dir); err != nil { + if !force { + return err + } + s.Logger().WithError(err).WithField("path", dir).Warn("failed to remove vm path") + } + } + + return nil +} + +func (s *stratovirt) setupMmioSlot(Name string, isPut bool) (int, error) { + Name = filepath.Base(strings.ToLower(Name)) + + if strings.HasPrefix(Name, "vd") { + charStr := strings.TrimPrefix(Name, "vd") + if charStr == Name { + return 0, fmt.Errorf("Could not parse idx from Name %q", Name) + } + + char := []rune(charStr) + idx := int(char[0] - 'a') + + if !isPut && s.state.mmioBlkSlots[idx] { + return 0, fmt.Errorf("failed to setup mmio slot, slot is being used %q", charStr) + } + s.state.mmioBlkSlots[idx] = !isPut + + return idx, nil + } + + return 0, fmt.Errorf("failed to setup mmio slot, Name is invalid %q", Name) +} + +func (s *stratovirt) getDevSlot(Name string) (int, error) { + slot, err := s.setupMmioSlot(Name, false) + if err != nil { + return 0, err + } + + return slot, nil +} + +func (s *stratovirt) delDevSlot(Name string) error { + if _, err := s.setupMmioSlot(Name, true); err != nil { + return err + } + + return nil +} + +func (s *stratovirt) hotplugBlk(ctx context.Context, drive *config.BlockDrive, op Operation) error { + err := s.qmpSetup() + if err != nil { + return err + } + + driver := "virtio-blk-mmio" + + defer func() { + if err != nil { + s.qmpMonitorCh.qmp.ExecuteBlockdevDel(s.qmpMonitorCh.ctx, drive.ID) + if errDel := s.delDevSlot(drive.VirtPath); errDel != nil { + s.Logger().WithError(errDel).Warn("Failed to delete device slot.") + } + } + }() + + switch op { + case AddDevice: + sblkDevice := govmmQemu.BlockDevice{ + ID: drive.ID, + File: drive.File, + ReadOnly: drive.ReadOnly, + AIO: govmmQemu.BlockDeviceAIO("native"), + } + if err := s.qmpMonitorCh.qmp.ExecuteBlockdevAdd(s.qmpMonitorCh.ctx, &sblkDevice); err != nil { + return err + } + + slot, err := s.getDevSlot(drive.VirtPath) + if err != nil { + return err + } + + devAddr := fmt.Sprintf("%d", slot) + if err := s.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(s.qmpMonitorCh.ctx, drive.ID, drive.ID, driver, devAddr, "", "", 0, false, false); err != nil { + return err + } + case RemoveDevice: + if errDel := s.delDevSlot(drive.VirtPath); errDel != nil { + s.Logger().WithError(errDel).Warn("Failed to delete device slot.") + } + if err := s.qmpMonitorCh.qmp.ExecuteDeviceDel(s.qmpMonitorCh.ctx, drive.ID); err != nil { + return err + } + + default: + return fmt.Errorf("operation is not supported %d", op) + } + + return nil +} + +func (s *stratovirt) createVirtiofsDaemon(sharedPath string) (VirtiofsDaemon, error) { + virtiofsdSocketPath, err := s.virtiofsSocketPath(s.id) + if err != nil { + return nil, err + } + + if s.config.SharedFS == config.VirtioFSNydus { + apiSockPath, err := s.nydusdSocketPath(s.id) + if err != nil { + return nil, err + } + nd := &nydusd{ + path: s.config.VirtioFSDaemon, + sockPath: virtiofsdSocketPath, + apiSockPath: apiSockPath, + sourcePath: sharedPath, + debug: s.config.Debug, + extraArgs: s.config.VirtioFSExtraArgs, + startFn: startInShimNS, + } + nd.setupShareDirFn = nd.setupPassthroughFS + return nd, nil + } + + // default use virtiofsd + return &virtiofsd{ + path: s.config.VirtioFSDaemon, + sourcePath: sharedPath, + socketPath: virtiofsdSocketPath, + extraArgs: s.config.VirtioFSExtraArgs, + cache: s.config.VirtioFSCache, + }, nil +} + +func (s *stratovirt) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error { + span, _ := katatrace.Trace(ctx, s.Logger(), "CreateVM", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) + defer span.End() + + s.ctx = ctx + err := s.setVMConfig(id, hypervisorConfig) + if err != nil { + return err + } + + if s.path, err = s.binPath(); err != nil { + return err + } + + s.virtiofsDaemon, err = s.createVirtiofsDaemon(hypervisorConfig.SharedPath) + if err != nil { + return err + } + + return nil +} + +func launchStratovirt(ctx context.Context, s *stratovirt) (*exec.Cmd, io.ReadCloser, error) { + var params []string + s.createParams(¶ms) + + cmd := exec.CommandContext(ctx, s.path, params...) + + if len(s.fds) > 0 { + s.Logger().Infof("Adding extra file %v", s.fds) + cmd.ExtraFiles = s.fds + } + + if s.config.Debug { + cmd.Env = []string{"STRATOVIRT_LOG_LEVEL=info"} + } + + reader, err := cmd.StdoutPipe() + if err != nil { + s.Logger().Error("Unable to connect stdout to a pipe") + return nil, nil, err + } + s.Logger().Infof("launching %s with: %v", s.path, params) + + if err := cmd.Start(); err != nil { + s.Logger().Error("Error starting hypervisor, please check the params") + return nil, nil, err + } + + return cmd, reader, nil +} + +func (s *stratovirt) StartVM(ctx context.Context, timeout int) error { + span, _ := katatrace.Trace(ctx, s.Logger(), "StartVM", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) + defer span.End() + + err := utils.MkdirAllWithInheritedOwner(s.svConfig.vmPath, DirMode) + if err != nil { + return err + } + + defer func() { + if err != nil { + if s.state.virtiofsPid != 0 { + syscall.Kill(s.state.virtiofsPid, syscall.SIGILL) + } + } + for _, fd := range s.fds { + if err := fd.Close(); err != nil { + s.Logger().WithError(err).Error("After launching StratoVirt") + } + } + s.fds = []*os.File{} + }() + + if err = s.setupVirtiofsDaemon(ctx); err != nil { + return err + } + defer func() { + if err != nil { + if shutdownErr := s.stopVirtiofsDaemon(ctx); shutdownErr != nil { + s.Logger().WithError(shutdownErr).Warn("Error shutting down the VirtiofsDaemon") + } + } + }() + + stratovirtCmd, reader, err := launchStratovirt(ctx, s) + if err != nil { + s.Logger().WithError(err).Error("failed to launch StratoVirt") + return fmt.Errorf("failed to launch StratoVirt: %s", err) + } + + go s.logAndWait(stratovirtCmd, reader) + + if err = s.waitVM(s.ctx, timeout); err != nil { + return err + } + + return nil +} + +func (s *stratovirt) StopVM(ctx context.Context, waitOnly bool) (err error) { + span, _ := katatrace.Trace(ctx, s.Logger(), "StopVM", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) + defer span.End() + + s.Logger().Info("Stopping Sandbox") + if s.stopped.Load() { + s.Logger().Info("Already stopped") + return nil + } + + defer func() { + s.cleanupVM(true) + if err == nil { + s.stopped.Store(true) + } + }() + + if err := s.qmpSetup(); err != nil { + return err + } + + pids := s.GetPids() + if len(pids) == 0 { + return errors.New("cannot determine StratoVirt PID") + } + pid := pids[0] + + if waitOnly { + err := utils.WaitLocalProcess(pid, stratovirtStopSandboxTimeoutSecs, syscall.Signal(0), s.Logger()) + if err != nil { + return err + } + } else { + err = syscall.Kill(pid, syscall.SIGKILL) + if err != nil { + s.Logger().WithError(err).Error("Failed to send SIGKILL to stratovirt") + return err + } + } + + if s.config.SharedFS == config.VirtioFS || s.config.SharedFS == config.VirtioFSNydus { + if err := s.stopVirtiofsDaemon(ctx); err != nil { + return err + } + } + + return nil +} + +func (s *stratovirt) PauseVM(ctx context.Context) error { + return nil +} + +func (s *stratovirt) SaveVM() error { + return nil +} + +func (s *stratovirt) ResumeVM(ctx context.Context) error { + return nil +} + +func (s *stratovirt) AddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) error { + span, _ := katatrace.Trace(ctx, s.Logger(), "AddDevice", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) + defer span.End() + + switch v := devInfo.(type) { + case types.Socket: + s.svConfig.devices = s.appendConsole(ctx, s.svConfig.devices) + case types.VSock: + s.fds = append(s.fds, v.VhostFd) + s.svConfig.devices = s.appendVhostVsock(ctx, s.svConfig.devices, v) + case Endpoint: + s.fds = append(s.fds, v.NetworkPair().TapInterface.VMFds...) + s.svConfig.devices = s.appendNetwork(ctx, s.svConfig.devices, v) + case config.BlockDrive: + s.svConfig.devices = s.appendBlock(ctx, s.svConfig.devices) + case types.Volume: + s.svConfig.devices = s.appendVirtioFs(ctx, s.svConfig.devices, v) + default: + s.Logger().WithField("dev-type", v).Warn("Could not append device: unsupported device type") + } + + return nil +} + +func (s *stratovirt) HotplugAddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error) { + span, _ := katatrace.Trace(ctx, s.Logger(), "HotplugAddDevice", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) + defer span.End() + + switch devType { + case BlockDev: + return nil, s.hotplugBlk(ctx, devInfo.(*config.BlockDrive), AddDevice) + default: + return nil, fmt.Errorf("Hotplug add device: unsupported device type '%v'", devType) + } +} + +func (s *stratovirt) HotplugRemoveDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error) { + span, _ := katatrace.Trace(ctx, s.Logger(), "HotplugRemoveDevice", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) + defer span.End() + + switch devType { + case BlockDev: + return nil, s.hotplugBlk(ctx, devInfo.(*config.BlockDrive), RemoveDevice) + default: + return nil, fmt.Errorf("Hotplug remove device: unsupported device type '%v'", devType) + } +} + +func (s *stratovirt) ResizeMemory(ctx context.Context, reqMemMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, MemoryDevice, error) { + return 0, MemoryDevice{}, nil +} + +func (s *stratovirt) ResizeVCPUs(ctx context.Context, reqVCPUs uint32) (currentVCPUs uint32, newVCPUs uint32, err error) { + return 0, 0, nil +} + +func (s *stratovirt) GetVMConsole(ctx context.Context, id string) (string, string, error) { + span, _ := katatrace.Trace(ctx, s.Logger(), "GetVMConsole", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) + defer span.End() + + consoleURL, err := s.consoleSocketPath(s.id) + if err != nil { + return consoleProtoUnix, "", err + } + + return consoleProtoUnix, consoleURL, nil +} + +func (s *stratovirt) Disconnect(ctx context.Context) { + span, _ := katatrace.Trace(ctx, s.Logger(), "Disconnect", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) + defer span.End() + + s.qmpShutdown() +} + +func (s *stratovirt) Capabilities(ctx context.Context) types.Capabilities { + span, _ := katatrace.Trace(ctx, s.Logger(), "Capabilities", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) + defer span.End() + var caps types.Capabilities + caps.SetBlockDeviceHotplugSupport() + if s.config.SharedFS != config.NoSharedFS { + caps.SetFsSharingSupport() + } + + return caps +} + +func (s *stratovirt) HypervisorConfig() HypervisorConfig { + return s.config +} + +func (s *stratovirt) GetTotalMemoryMB(ctx context.Context) uint32 { + return s.config.MemorySize +} + +func (s *stratovirt) GetThreadIDs(ctx context.Context) (VcpuThreadIDs, error) { + span, _ := katatrace.Trace(ctx, s.Logger(), "GetThreadIDs", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) + defer span.End() + + tid := VcpuThreadIDs{} + if err := s.qmpSetup(); err != nil { + return tid, err + } + + cpuInfos, err := s.qmpMonitorCh.qmp.ExecQueryCpus(s.qmpMonitorCh.ctx) + if err != nil { + s.Logger().WithError(err).Error("failed to query cpu infos") + return tid, err + } + + tid.vcpus = make(map[int]int, len(cpuInfos)) + for _, i := range cpuInfos { + if i.ThreadID > 0 { + tid.vcpus[i.CPU] = i.ThreadID + } + } + return tid, nil +} + +func (s *stratovirt) Cleanup(ctx context.Context) error { + span, _ := katatrace.Trace(ctx, s.Logger(), "Cleanup", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) + defer span.End() + + for _, fd := range s.fds { + if err := fd.Close(); err != nil { + s.Logger().WithError(err).Warn("failed closing fd") + } + } + s.fds = []*os.File{} + + return nil +} + +func (s *stratovirt) setConfig(config *HypervisorConfig) error { + s.config = *config + + return nil +} + +func (s *stratovirt) GetPids() []int { + var pids []int + pids = append(pids, s.state.pid) + + return pids +} + +func (s *stratovirt) GetVirtioFsPid() *int { + return &s.state.virtiofsPid +} + +func (s *stratovirt) fromGrpc(ctx context.Context, hypervisorConfig *HypervisorConfig, j []byte) error { + return errors.New("StratoVirt is not supported by VM cache") +} + +func (s *stratovirt) toGrpc(ctx context.Context) ([]byte, error) { + return nil, errors.New("StratoVirt is not supported by VM cache") +} + +func (s *stratovirt) Check() error { + if s.stopped.Load() { + return fmt.Errorf("StratoVirt is not running") + } + + if err := s.qmpSetup(); err != nil { + return err + } + + return nil +} + +func (s *stratovirt) Save() (hs hv.HypervisorState) { + pids := s.GetPids() + hs.Pid = pids[0] + hs.VirtiofsDaemonPid = s.state.virtiofsPid + hs.Type = string(StratovirtHypervisor) + return +} + +func (s *stratovirt) Load(hs hv.HypervisorState) { + s.state.pid = hs.Pid + s.state.virtiofsPid = hs.VirtiofsDaemonPid +} + +func (s *stratovirt) GenerateSocket(id string) (interface{}, error) { + return generateVMSocket(id, s.config.VMStorePath) +} + +func (s *stratovirt) IsRateLimiterBuiltin() bool { + return false +} diff --git a/src/runtime/virtcontainers/stratovirt_test.go b/src/runtime/virtcontainers/stratovirt_test.go new file mode 100644 index 000000000000..3b41eaf18ac3 --- /dev/null +++ b/src/runtime/virtcontainers/stratovirt_test.go @@ -0,0 +1,432 @@ +//go:build linux + +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import ( + "context" + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" + "github.com/pkg/errors" + "github.com/stretchr/testify/assert" +) + +func newStratovirtConfig() (HypervisorConfig, error) { + + setupStratovirt() + + if testStratovirtPath == "" { + return HypervisorConfig{}, errors.New("hypervisor fake path is empty") + } + + if testVirtiofsdPath == "" { + return HypervisorConfig{}, errors.New("virtiofsd fake path is empty") + } + + if _, err := os.Stat(testStratovirtPath); os.IsNotExist(err) { + return HypervisorConfig{}, err + } + + if _, err := os.Stat(testVirtiofsdPath); os.IsNotExist(err) { + return HypervisorConfig{}, err + } + + return HypervisorConfig{ + HypervisorPath: testStratovirtPath, + KernelPath: testStratovirtKernelPath, + InitrdPath: testStratovirtInitrdPath, + RootfsType: string(EXT4), + NumVCPUsF: defaultVCPUs, + BlockDeviceDriver: config.VirtioBlock, + MemorySize: defaultMemSzMiB, + DefaultMaxVCPUs: uint32(64), + SharedFS: config.VirtioFS, + VirtioFSCache: typeVirtioFSCacheModeAlways, + VirtioFSDaemon: testVirtiofsdPath, + }, nil +} + +func TestStratovirtCreateVM(t *testing.T) { + assert := assert.New(t) + + store, err := persist.GetDriver() + assert.NoError(err) + + network, err := NewNetwork() + assert.NoError(err) + + sv := stratovirt{ + config: HypervisorConfig{ + VMStorePath: store.RunVMStoragePath(), + RunStorePath: store.RunStoragePath(), + }, + } + + config0, err := newStratovirtConfig() + assert.NoError(err) + + config1, err := newStratovirtConfig() + assert.NoError(err) + config1.ImagePath = testStratovirtImagePath + config1.InitrdPath = "" + + config2, err := newStratovirtConfig() + assert.NoError(err) + config2.Debug = true + + config3, err := newStratovirtConfig() + assert.NoError(err) + config3.SharedFS = config.VirtioFS + + config4, err := newStratovirtConfig() + assert.NoError(err) + config4.SharedFS = config.VirtioFSNydus + + type testData struct { + config HypervisorConfig + expectError bool + configMatch bool + } + + data := []testData{ + {config0, false, true}, + {config1, false, true}, + {config2, false, true}, + {config3, false, true}, + {config4, false, true}, + } + + for i, d := range data { + msg := fmt.Sprintf("test[%d]", i) + + err = sv.CreateVM(context.Background(), "testSandbox", network, &d.config) + + if d.expectError { + assert.Error(err, msg) + continue + } + + assert.NoError(err, msg) + + if d.configMatch { + assert.Exactly(d.config, sv.config, msg) + } + } +} + +func TestStratovirtStartSandbox(t *testing.T) { + assert := assert.New(t) + sConfig, err := newStratovirtConfig() + assert.NoError(err) + sConfig.Debug = true + + network, err := NewNetwork() + assert.NoError(err) + + store, err := persist.GetDriver() + assert.NoError(err) + + sConfig.VMStorePath = store.RunVMStoragePath() + sConfig.RunStorePath = store.RunStoragePath() + + sv := &stratovirt{ + config: sConfig, + virtiofsDaemon: &virtiofsdMock{}, + } + + assert.Exactly(sv.stopped.Load(), false) + + err = sv.CreateVM(context.Background(), "testSandbox", network, &sConfig) + assert.NoError(err) + + mem := sv.GetTotalMemoryMB(context.Background()) + assert.True(mem > 0) + + err = sv.StartVM(context.Background(), 10) + assert.Error(err) +} + +func TestStratovirtCleanupVM(t *testing.T) { + assert := assert.New(t) + store, err := persist.GetDriver() + assert.NoError(err, "persist.GetDriver() unexpected error") + + sv := &stratovirt{ + id: "cleanVM", + config: HypervisorConfig{ + VMStorePath: store.RunVMStoragePath(), + RunStorePath: store.RunStoragePath(), + }, + } + sv.svConfig.vmPath = filepath.Join(sv.config.VMStorePath, sv.id) + sv.config.VMid = "cleanVM" + + err = sv.cleanupVM(true) + assert.NoError(err, "persist.GetDriver() unexpected error") + + dir := filepath.Join(store.RunVMStoragePath(), sv.id) + os.MkdirAll(dir, os.ModePerm) + + err = sv.cleanupVM(false) + assert.NoError(err, "persist.GetDriver() unexpected error") + + _, err = os.Stat(dir) + assert.Error(err, "dir should not exist %s", dir) + + assert.True(os.IsNotExist(err), "persist.GetDriver() unexpected error") +} + +func TestStratovirtAddFsDevice(t *testing.T) { + assert := assert.New(t) + sConfig, err := newStratovirtConfig() + assert.NoError(err) + sConfig.SharedFS = config.VirtioFS + mountTag := "testMountTag" + + sv := &stratovirt{ + ctx: context.Background(), + config: sConfig, + } + volume := types.Volume{ + MountTag: mountTag, + } + expected := []VirtioDev{ + virtioFs{ + backend: "socket", + charID: "virtio_fs", + charDev: "virtio_fs", + tag: volume.MountTag, + deviceID: "virtio-fs0", + driver: mmioBus, + }, + } + + err = sv.AddDevice(context.Background(), volume, FsDev) + assert.NoError(err) + assert.Exactly(sv.svConfig.devices, expected) +} + +func TestStratovirtAddBlockDevice(t *testing.T) { + assert := assert.New(t) + sConfig, err := newStratovirtConfig() + assert.NoError(err) + + sv := &stratovirt{ + ctx: context.Background(), + config: sConfig, + } + blockDrive := config.BlockDrive{} + expected := []VirtioDev{ + blkDevice{ + id: "rootfs", + filePath: sv.svConfig.rootfsPath, + deviceID: "virtio-blk0", + driver: mmioBus, + }, + } + + err = sv.AddDevice(context.Background(), blockDrive, BlockDev) + assert.NoError(err) + assert.Exactly(sv.svConfig.devices, expected) +} + +func TestStratovirtAddVsockDevice(t *testing.T) { + assert := assert.New(t) + sConfig, err := newStratovirtConfig() + assert.NoError(err) + + dir := t.TempDir() + vsockFilename := filepath.Join(dir, "vsock") + contextID := uint64(3) + port := uint32(1024) + vsockFile, fileErr := os.Create(vsockFilename) + assert.NoError(fileErr) + defer vsockFile.Close() + + sv := &stratovirt{ + ctx: context.Background(), + config: sConfig, + } + vsock := types.VSock{ + ContextID: contextID, + Port: port, + VhostFd: vsockFile, + } + expected := []VirtioDev{ + vhostVsock{ + id: "vsock-id", + guestID: fmt.Sprintf("%d", contextID), + VHostFD: vsockFile, + driver: mmioBus, + }, + } + + err = sv.AddDevice(context.Background(), vsock, VSockPCIDev) + assert.NoError(err) + assert.Exactly(sv.svConfig.devices, expected) +} + +func TestStratovirtAddConsole(t *testing.T) { + assert := assert.New(t) + sConfig, err := newStratovirtConfig() + assert.NoError(err) + + sv := &stratovirt{ + ctx: context.Background(), + config: sConfig, + } + sock := types.Socket{} + expected := []VirtioDev{ + consoleDevice{ + id: "virtio-serial0", + backend: "socket", + charID: "charconsole0", + devType: "virtconsole", + charDev: "charconsole0", + deviceID: "virtio-console0", + driver: mmioBus, + }, + } + + err = sv.AddDevice(context.Background(), sock, SerialPortDev) + assert.NoError(err) + assert.Exactly(sv.svConfig.devices, expected) +} + +func TestStratovirtGetSandboxConsole(t *testing.T) { + assert := assert.New(t) + store, err := persist.GetDriver() + assert.NoError(err) + + sandboxID := "testSandboxID" + sv := &stratovirt{ + id: sandboxID, + ctx: context.Background(), + config: HypervisorConfig{ + VMStorePath: store.RunVMStoragePath(), + RunStorePath: store.RunStoragePath(), + }, + } + expected := filepath.Join(store.RunVMStoragePath(), sandboxID, debugSocket) + + proto, result, err := sv.GetVMConsole(sv.ctx, sandboxID) + assert.NoError(err) + assert.Equal(result, expected) + assert.Equal(proto, consoleProtoUnix) +} + +func TestStratovirtCapabilities(t *testing.T) { + assert := assert.New(t) + + sConfig, err := newStratovirtConfig() + assert.NoError(err) + + sv := stratovirt{} + assert.Equal(sv.config, HypervisorConfig{}) + + sConfig.SharedFS = config.VirtioFS + + err = sv.setConfig(&sConfig) + assert.NoError(err) + + var ctx context.Context + c := sv.Capabilities(ctx) + assert.True(c.IsFsSharingSupported()) + + sConfig.SharedFS = config.NoSharedFS + + err = sv.setConfig(&sConfig) + assert.NoError(err) + + c = sv.Capabilities(ctx) + assert.False(c.IsFsSharingSupported()) +} + +func TestStratovirtSetConfig(t *testing.T) { + assert := assert.New(t) + + config, err := newStratovirtConfig() + assert.NoError(err) + + sv := stratovirt{} + assert.Equal(sv.config, HypervisorConfig{}) + + err = sv.setConfig(&config) + assert.NoError(err) + + assert.Equal(sv.config, config) +} + +func TestStratovirtCleanup(t *testing.T) { + assert := assert.New(t) + sConfig, err := newStratovirtConfig() + assert.NoError(err) + + sv := &stratovirt{ + ctx: context.Background(), + config: sConfig, + } + + err = sv.Cleanup(sv.ctx) + assert.Nil(err) +} + +func TestStratovirtGetpids(t *testing.T) { + assert := assert.New(t) + + sv := &stratovirt{} + pids := sv.GetPids() + assert.NotNil(pids) + assert.True(len(pids) == 1) + assert.True(pids[0] == 0) +} + +func TestStratovirtBinPath(t *testing.T) { + assert := assert.New(t) + + f, err := os.CreateTemp("", "stratovirt") + assert.NoError(err) + defer func() { _ = f.Close() }() + defer func() { _ = os.Remove(f.Name()) }() + + expectedPath := f.Name() + sConfig, err := newStratovirtConfig() + assert.NoError(err) + + sConfig.HypervisorPath = expectedPath + sv := &stratovirt{ + config: sConfig, + } + + // get config hypervisor path + path, err := sv.binPath() + assert.NoError(err) + assert.Equal(path, expectedPath) + + // config hypervisor path does not exist + sv.config.HypervisorPath = "/abc/xyz/123" + path, err = sv.binPath() + assert.Error(err) + assert.Equal(path, "") + + // get default stratovirt hypervisor path + sv.config.HypervisorPath = "" + path, err = sv.binPath() + if _, errStat := os.Stat(path); os.IsNotExist(errStat) { + assert.Error(err) + assert.Equal(path, "") + } else { + assert.NoError(err) + assert.Equal(path, defaultStratoVirt) + } +} diff --git a/src/runtime/virtcontainers/types/sandbox.go b/src/runtime/virtcontainers/types/sandbox.go index 5149b0423297..29c909c977fb 100644 --- a/src/runtime/virtcontainers/types/sandbox.go +++ b/src/runtime/virtcontainers/types/sandbox.go @@ -7,6 +7,7 @@ package types import ( "fmt" + "net" "os" "strings" @@ -37,6 +38,7 @@ const ( HybridVSockScheme = "hvsock" MockHybridVSockScheme = "mock" VSockScheme = "vsock" + RemoteSockScheme = "remote" ) // SandboxState is a sandbox state structure @@ -210,6 +212,16 @@ func (s *HybridVSock) String() string { return fmt.Sprintf("%s://%s:%d", HybridVSockScheme, s.UdsPath, s.Port) } +type RemoteSock struct { + Conn net.Conn + SandboxID string + TunnelSocketPath string +} + +func (s *RemoteSock) String() string { + return fmt.Sprintf("%s://%s", RemoteSockScheme, s.TunnelSocketPath) +} + // MockHybridVSock defines a mock hybrid vsocket for tests only. type MockHybridVSock struct { UdsPath string diff --git a/src/runtime/virtcontainers/types/sandbox_test.go b/src/runtime/virtcontainers/types/sandbox_test.go index 05075e449592..54e2e78c494f 100644 --- a/src/runtime/virtcontainers/types/sandbox_test.go +++ b/src/runtime/virtcontainers/types/sandbox_test.go @@ -6,6 +6,8 @@ package types import ( + "os" + "path/filepath" "testing" "github.com/stretchr/testify/assert" @@ -163,6 +165,78 @@ func TestVolumesStringSuccessful(t *testing.T) { assert.Equal(t, result, expected) } +func TestStringFromVSock(t *testing.T) { + assert := assert.New(t) + + dir := t.TempDir() + + contextID := uint64(16187) + port := uint32(1024) + vsockFilename := filepath.Join(dir, "vsock") + + vsockFile, err := os.Create(vsockFilename) + assert.NoError(err) + defer vsockFile.Close() + + vsock := VSock{ + ContextID: contextID, + Port: port, + VhostFd: vsockFile, + } + + expected := "vsock://16187:1024" + + assert.Equal(vsock.String(), expected) +} + +func TestStringFromHybridVSock(t *testing.T) { + assert := assert.New(t) + + udsPath := "udspath" + contextID := uint64(16187) + port := uint32(1024) + + sock := HybridVSock{ + UdsPath: udsPath, + ContextID: contextID, + Port: port, + } + + expected := "hvsock://udspath:1024" + + assert.Equal(sock.String(), expected) +} + +func TestStringFromRemoteSock(t *testing.T) { + assert := assert.New(t) + + sandboxID := "sandboxID" + tunnelSockerPath := "tunnelSocketPath" + + sock := RemoteSock{ + SandboxID: sandboxID, + TunnelSocketPath: tunnelSockerPath, + } + + expected := "remote://tunnelSocketPath" + + assert.Equal(sock.String(), expected) +} + +func TestStringFromMockHybridVSock(t *testing.T) { + assert := assert.New(t) + + udsPath := "udspath" + + sock := MockHybridVSock{ + UdsPath: udsPath, + } + + expected := "mock://udspath" + + assert.Equal(sock.String(), expected) +} + func TestSocketsSetSuccessful(t *testing.T) { sockets := &Sockets{} diff --git a/src/runtime/virtcontainers/types/virtual_volume.go b/src/runtime/virtcontainers/types/virtual_volume.go new file mode 100644 index 000000000000..2d93519b0194 --- /dev/null +++ b/src/runtime/virtcontainers/types/virtual_volume.go @@ -0,0 +1,156 @@ +package types + +import ( + "encoding/base64" + "encoding/hex" + "encoding/json" + "fmt" + "strings" + + "github.com/pkg/errors" +) + +const ( + minBlockSize = 1 << 9 + maxBlockSize = 1 << 19 +) + +const ( + KataVirtualVolumeDirectBlockType = "direct_block" + KataVirtualVolumeImageRawBlockType = "image_raw_block" + KataVirtualVolumeLayerRawBlockType = "layer_raw_block" + KataVirtualVolumeImageNydusBlockType = "image_nydus_block" + KataVirtualVolumeLayerNydusBlockType = "layer_nydus_block" + KataVirtualVolumeImageNydusFsType = "image_nydus_fs" + KataVirtualVolumeLayerNydusFsType = "layer_nydus_fs" + KataVirtualVolumeImageGuestPullType = "image_guest_pull" +) + +// DmVerityInfo contains configuration information for DmVerity device. +type DmVerityInfo struct { + HashType string `json:"hashtype"` + Hash string `json:"hash"` + BlockNum uint64 `json:"blocknum"` + Blocksize uint64 `json:"blocksize"` + Hashsize uint64 `json:"hashsize"` + Offset uint64 `json:"offset"` +} + +// DirectAssignedVolume contains meta information for a directly assigned volume. +type DirectAssignedVolume struct { + Metadata map[string]string `json:"metadata"` +} + +// ImagePullVolume contains meta information for pulling an image inside the guest. +type ImagePullVolume struct { + Metadata map[string]string `json:"metadata"` +} + +// NydusImageVolume contains Nydus image volume information. +type NydusImageVolume struct { + Config string `json:"config"` + SnapshotDir string `json:"snapshot_dir"` +} + +// KataVirtualVolume encapsulates information for extra mount options and direct volumes. +type KataVirtualVolume struct { + VolumeType string `json:"volume_type"` + Source string `json:"source,omitempty"` + FSType string `json:"fs_type,omitempty"` + Options []string `json:"options,omitempty"` + DirectVolume *DirectAssignedVolume `json:"direct_volume,omitempty"` + ImagePull *ImagePullVolume `json:"image_pull,omitempty"` + NydusImage *NydusImageVolume `json:"nydus_image,omitempty"` + DmVerity *DmVerityInfo `json:"dm_verity,omitempty"` +} + +func (d *DmVerityInfo) IsValid() error { + err := d.validateHashType() + if err != nil { + return err + } + + if d.BlockNum == 0 || d.BlockNum > uint64(^uint32(0)) { + return fmt.Errorf("Zero block count for DmVerity device %s", d.Hash) + } + + if !isValidBlockSize(d.Blocksize) || !isValidBlockSize(d.Hashsize) { + return fmt.Errorf("Unsupported verity block size: data_block_size = %d, hash_block_size = %d", d.Blocksize, d.Hashsize) + } + + if d.Offset%d.Hashsize != 0 || d.Offset < d.Blocksize*d.BlockNum { + return fmt.Errorf("Invalid hashvalue offset %d for DmVerity device %s", d.Offset, d.Hash) + } + + return nil +} + +func (d *DirectAssignedVolume) IsValid() bool { + return d.Metadata != nil +} + +func (i *ImagePullVolume) IsValid() bool { + return i.Metadata != nil +} + +func (n *NydusImageVolume) IsValid() bool { + return len(n.Config) > 0 || len(n.SnapshotDir) > 0 +} + +func (k *KataVirtualVolume) IsValid() bool { + return len(k.VolumeType) > 0 && + (k.DirectVolume == nil || k.DirectVolume.IsValid()) && + (k.ImagePull == nil || k.ImagePull.IsValid()) && + (k.NydusImage == nil || k.NydusImage.IsValid()) && + (k.DmVerity == nil || k.DmVerity.IsValid() == nil) +} + +func (d *DmVerityInfo) validateHashType() error { + switch strings.ToLower(d.HashType) { + case "sha256": + return d.isValidHash(64, "sha256") + case "sha1": + return d.isValidHash(40, "sha1") + default: + return fmt.Errorf("Unsupported hash algorithm %s for DmVerity device %s", d.HashType, d.Hash) + } +} + +func isValidBlockSize(blockSize uint64) bool { + return minBlockSize <= blockSize && blockSize <= maxBlockSize +} + +func (d *DmVerityInfo) isValidHash(expectedLen int, hashType string) error { + _, err := hex.DecodeString(d.Hash) + if len(d.Hash) != expectedLen || err != nil { + return fmt.Errorf("Invalid hash value %s:%s for DmVerity device with %s", hashType, d.Hash, hashType) + } + return nil +} + +func ParseDmVerityInfo(option string) (*DmVerityInfo, error) { + no := &DmVerityInfo{} + if err := json.Unmarshal([]byte(option), no); err != nil { + return nil, errors.Wrapf(err, "DmVerityInfo json unmarshal err") + } + if err := no.IsValid(); err != nil { + return nil, fmt.Errorf("DmVerityInfo is not correct, %+v; error = %+v", no, err) + } + return no, nil +} + +func ParseKataVirtualVolume(option string) (*KataVirtualVolume, error) { + opt, err := base64.StdEncoding.DecodeString(option) + if err != nil { + return nil, errors.Wrap(err, "KataVirtualVolume base64 decoding err") + } + no := &KataVirtualVolume{} + if err := json.Unmarshal(opt, no); err != nil { + return nil, errors.Wrapf(err, "KataVirtualVolume json unmarshal err") + } + if !no.IsValid() { + return nil, fmt.Errorf("KataVirtualVolume is not correct, %+v", no) + } + + return no, nil +} diff --git a/src/runtime/virtcontainers/types/virtual_volume_test.go b/src/runtime/virtcontainers/types/virtual_volume_test.go new file mode 100644 index 000000000000..6acecf3f3454 --- /dev/null +++ b/src/runtime/virtcontainers/types/virtual_volume_test.go @@ -0,0 +1,246 @@ +package types + +import ( + "encoding/base64" + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDmVerityInfoValidation(t *testing.T) { + TestData := []DmVerityInfo{ + { + HashType: "md5", // "md5" is not a supported hash algorithm + Blocksize: 512, + Hashsize: 512, + BlockNum: 16384, + Offset: 8388608, + Hash: "9de18652fe74edfb9b805aaed72ae2aa48f94333f1ba5c452ac33b1c39325174", + }, + { + HashType: "sha256", + Blocksize: 3000, // Invalid block size, not a power of 2. + Hashsize: 512, + BlockNum: 16384, + Offset: 8388608, + Hash: "9de18652fe74edfb9b805aaed72ae2aa48f94333f1ba5c452ac33b1c39325174", + }, + { + HashType: "sha256", + Blocksize: 0, // Invalid block size, less than 512. + Hashsize: 512, + BlockNum: 16384, + Offset: 8388608, + Hash: "9de18652fe74edfb9b805aaed72ae2aa48f94333f1ba5c452ac33b1c39325174", + }, + { + HashType: "sha256", + Blocksize: 524800, // Invalid block size, greater than 524288. + Hashsize: 512, + BlockNum: 16384, + Offset: 8388608, + Hash: "9de18652fe74edfb9b805aaed72ae2aa48f94333f1ba5c452ac33b1c39325174", + }, + { + HashType: "sha256", + Blocksize: 512, + Hashsize: 3000, // Invalid hash block size, not a power of 2. + BlockNum: 16384, + Offset: 8388608, + Hash: "9de18652fe74edfb9b805aaed72ae2aa48f94333f1ba5c452ac33b1c39325174", + }, + { + HashType: "sha256", + Blocksize: 512, + Hashsize: 0, // Invalid hash block size, less than 512. + BlockNum: 16384, + Offset: 8388608, + Hash: "9de18652fe74edfb9b805aaed72ae2aa48f94333f1ba5c452ac33b1c39325174", + }, + { + HashType: "sha256", + Blocksize: 512, + Hashsize: 524800, // Invalid hash block size, greater than 524288. + BlockNum: 16384, + Offset: 8388608, + Hash: "9de18652fe74edfb9b805aaed72ae2aa48f94333f1ba5c452ac33b1c39325174", + }, + { + HashType: "sha256", + Blocksize: 512, + Hashsize: 512, + BlockNum: 0, // Invalid BlockNum, it must be greater than 0. + Offset: 8388608, + Hash: "9de18652fe74edfb9b805aaed72ae2aa48f94333f1ba5c452ac33b1c39325174", + }, + { + HashType: "sha256", + Blocksize: 512, + Hashsize: 512, + BlockNum: 16384, + Offset: 0, // Invalid offset, it must be greater than 0. + Hash: "9de18652fe74edfb9b805aaed72ae2aa48f94333f1ba5c452ac33b1c39325174", + }, + { + HashType: "sha256", + Blocksize: 512, + Hashsize: 512, + BlockNum: 16384, + Offset: 8193, // Invalid offset, it must be aligned to 512. + Hash: "9de18652fe74edfb9b805aaed72ae2aa48f94333f1ba5c452ac33b1c39325174", + }, + { + HashType: "sha256", + Blocksize: 512, + Hashsize: 512, + BlockNum: 16384, + Offset: 8388608 - 4096, // Invalid offset, it must be equal to blocksize * BlockNum. + Hash: "9de18652fe74edfb9b805aaed72ae2aa48f94333f1ba5c452ac33b1c39325174", + }, + } + + for _, d := range TestData { + assert.Error(t, d.IsValid()) + } + TestCorrectData := DmVerityInfo{ + HashType: "sha256", + Blocksize: 512, + Hashsize: 512, + BlockNum: 16384, + Offset: 8388608, + Hash: "9de18652fe74edfb9b805aaed72ae2aa48f94333f1ba5c452ac33b1c39325174", + } + assert.NoError(t, TestCorrectData.IsValid()) +} + +func TestDirectAssignedVolumeValidation(t *testing.T) { + validDirectVolume := DirectAssignedVolume{ + Metadata: map[string]string{"key": "value"}, + } + assert.True(t, validDirectVolume.IsValid()) + + invalidDirectVolume := DirectAssignedVolume{ + Metadata: nil, + } + assert.False(t, invalidDirectVolume.IsValid()) +} + +func TestImagePullVolumeValidation(t *testing.T) { + validImagePull := ImagePullVolume{ + Metadata: map[string]string{"key": "value"}, + } + assert.True(t, validImagePull.IsValid()) + + invalidImagePull := ImagePullVolume{ + Metadata: nil, + } + assert.False(t, invalidImagePull.IsValid()) +} + +func TestNydusImageVolumeValidation(t *testing.T) { + validNydusImage := NydusImageVolume{ + Config: "config_value", + SnapshotDir: "", + } + assert.True(t, validNydusImage.IsValid()) + + invalidNydusImage := NydusImageVolume{ + Config: "", + SnapshotDir: "", + } + assert.False(t, invalidNydusImage.IsValid()) +} + +func TestKataVirtualVolumeValidation(t *testing.T) { + validKataVirtualVolume := KataVirtualVolume{ + VolumeType: "direct_block", + Source: "/dev/sdb", + FSType: "ext4", + Options: []string{"rw"}, + DirectVolume: &DirectAssignedVolume{ + Metadata: map[string]string{"key": "value"}, + }, + // Initialize other fields + } + assert.True(t, validKataVirtualVolume.IsValid()) + + invalidKataVirtualVolume := KataVirtualVolume{ + VolumeType: "direct_block", + Source: "/dev/sdb", + FSType: "", + Options: nil, + DirectVolume: &DirectAssignedVolume{ + Metadata: nil, + }, + // Initialize other fields + } + assert.False(t, invalidKataVirtualVolume.IsValid()) +} + +func TestParseDmVerityInfo(t *testing.T) { + // Create a mock valid KataVirtualVolume + validDmVerityInfo := DmVerityInfo{ + HashType: "sha256", + Blocksize: 512, + Hashsize: 512, + BlockNum: 16384, + Offset: 8388608, + Hash: "9de18652fe74edfb9b805aaed72ae2aa48f94333f1ba5c452ac33b1c39325174", + } + validKataVirtualVolumeJSON, _ := json.Marshal(validDmVerityInfo) + + t.Run("Valid Option", func(t *testing.T) { + volume, err := ParseDmVerityInfo(string(validKataVirtualVolumeJSON)) + assert.NoError(t, err) + assert.NotNil(t, volume) + assert.NoError(t, volume.IsValid()) + }) + + t.Run("Invalid JSON Option", func(t *testing.T) { + volume, err := ParseDmVerityInfo("invalid_json") + assert.Error(t, err) + assert.Nil(t, volume) + }) + +} + +func TestParseKataVirtualVolume(t *testing.T) { + // Create a mock valid KataVirtualVolume + validKataVirtualVolume := KataVirtualVolume{ + VolumeType: "direct_block", + Source: "/dev/sdb", + FSType: "ext4", + Options: []string{"rw"}, + DirectVolume: &DirectAssignedVolume{ + Metadata: map[string]string{"key": "value"}, + }, + // Initialize other fields + } + validKataVirtualVolumeJSON, _ := json.Marshal(validKataVirtualVolume) + validOption := base64.StdEncoding.EncodeToString(validKataVirtualVolumeJSON) + + t.Run("Valid Option", func(t *testing.T) { + volume, err := ParseKataVirtualVolume(validOption) + + assert.NoError(t, err) + assert.NotNil(t, volume) + assert.True(t, volume.IsValid()) + }) + + t.Run("Invalid JSON Option", func(t *testing.T) { + invalidJSONOption := base64.StdEncoding.EncodeToString([]byte("invalid_json")) + volume, err := ParseKataVirtualVolume(invalidJSONOption) + + assert.Error(t, err) + assert.Nil(t, volume) + }) + + invalidBase64Option := "invalid_base64" + t.Run("Invalid Base64 Option", func(t *testing.T) { + volume, err := ParseKataVirtualVolume(invalidBase64Option) + + assert.Error(t, err) + assert.Nil(t, volume) + }) +} diff --git a/src/runtime/virtcontainers/virtcontainers_test.go b/src/runtime/virtcontainers/virtcontainers_test.go index 0a269b8b9315..f366558d5009 100644 --- a/src/runtime/virtcontainers/virtcontainers_test.go +++ b/src/runtime/virtcontainers/virtcontainers_test.go @@ -48,6 +48,10 @@ var testAcrnKernelPath = "" var testAcrnImagePath = "" var testAcrnPath = "" var testAcrnCtlPath = "" +var testStratovirtKernelPath = "" +var testStratovirtImagePath = "" +var testStratovirtInitrdPath = "" +var testStratovirtPath = "" var testVirtiofsdPath = "" var testHyperstartCtlSocket = "" @@ -89,6 +93,18 @@ func setupClh() { } } +func setupStratovirt() { + os.Mkdir(filepath.Join(testDir, testBundle), DirMode) + + for _, filename := range []string{testStratovirtKernelPath, testStratovirtInitrdPath, testStratovirtPath, testVirtiofsdPath} { + _, err := os.Create(filename) + if err != nil { + fmt.Printf("Could not recreate %s:%v", filename, err) + os.Exit(1) + } + } +} + // TestMain is the common main function used by ALL the test functions // for this package. func TestMain(m *testing.M) { @@ -149,6 +165,13 @@ func TestMain(m *testing.M) { setupClh() + testStratovirtKernelPath = filepath.Join(testDir, testBundle, testKernel) + testStratovirtImagePath = filepath.Join(testDir, testBundle, testInitrd) + testStratovirtInitrdPath = filepath.Join(testDir, testBundle, testInitrd) + testStratovirtPath = filepath.Join(testDir, testBundle, testHypervisor) + + setupStratovirt() + // set now that configStoragePath has been overridden. sandboxDirState = filepath.Join(fs.MockRunStoragePath(), testSandboxID) diff --git a/tests/common.bash b/tests/common.bash index b3f8ac10b3c2..9e9025bbf489 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -223,6 +223,13 @@ function restart_systemd_service_with_no_burst_limit() { local unit_file=$(systemctl show "$service.service" -p FragmentPath | cut -d'=' -f2) [ -f "$unit_file" ] || { warn "Can't find $service's unit file: $unit_file"; return 1; } + # If the unit file is in /lib, copy it to /etc + if [[ $unit_file == /lib* ]]; then + tmp_unit_file="/etc/${unit_file#*lib/}" + sudo cp "$unit_file" "$tmp_unit_file" + unit_file="$tmp_unit_file" + fi + local start_burst_set=$(sudo grep StartLimitBurst $unit_file | wc -l) if [ "$start_burst_set" -eq 0 ] then diff --git a/tests/integration/cri-containerd/gha-run.sh b/tests/integration/cri-containerd/gha-run.sh index 542964423837..6181deede7f0 100755 --- a/tests/integration/cri-containerd/gha-run.sh +++ b/tests/integration/cri-containerd/gha-run.sh @@ -35,7 +35,7 @@ function install_dependencies() { sudo apt-get -y install "${system_deps[@]}" ensure_yq - ${repo_root_dir}/tests/install_go.sh -p + ${repo_root_dir}/tests/install_go.sh -p -f # Dependency list of projects that we can install them # directly from their releases on GitHub: diff --git a/tests/integration/cri-containerd/integration-tests.sh b/tests/integration/cri-containerd/integration-tests.sh index 58d28dcb71f3..a58e7657a22c 100755 --- a/tests/integration/cri-containerd/integration-tests.sh +++ b/tests/integration/cri-containerd/integration-tests.sh @@ -215,7 +215,7 @@ function testContainerStop() { } function TestKilledVmmCleanup() { - if [[ "${KATA_HYPERVISOR}" != "qemu" ]]; then + if [[ "${KATA_HYPERVISOR}" != "qemu" ]] || [[ "${ARCH}" == "ppc64le" ]]; then info "TestKilledVmmCleanup is skipped for ${KATA_HYPERVISOR}, only QEMU is currently tested" return 0 fi diff --git a/tests/integration/kubernetes/filter_k8s_test.sh b/tests/integration/kubernetes/filter_k8s_test.sh index 2b90076d9cdf..cc2017aef46c 100755 --- a/tests/integration/kubernetes/filter_k8s_test.sh +++ b/tests/integration/kubernetes/filter_k8s_test.sh @@ -8,15 +8,12 @@ set -o errexit set -o nounset set -o pipefail -GOPATH_LOCAL="${GOPATH%%:*}" -KATA_DIR="${GOPATH_LOCAL}/src/github.com/kata-containers" -TEST_DIR="${KATA_DIR}/tests" -CI_DIR="${TEST_DIR}/.ci" +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +repo_root_dir="$(cd "${script_dir}/../../.." && pwd)" +GOPATH_LOCAL="${GOPATH%%:*}" K8S_FILTER_FLAG="kubernetes" -source "${CI_DIR}/lib.sh" - main() { local K8S_CONFIG_FILE="$1" @@ -25,8 +22,9 @@ main() mapfile -d " " -t _K8S_TEST_UNION <<< "${K8S_TEST_UNION}" - # install yq if not exist - ${CI_DIR}/install_yq.sh > /dev/null + if [ ! -f ${GOPATH_LOCAL}/bin/yq ]; then + ${repo_root_dir}/ci/install_yq.sh > /dev/null + fi local K8S_SKIP_UNION=$("${GOPATH_LOCAL}/bin/yq" read "${K8S_CONFIG_FILE}" "${K8S_FILTER_FLAG}") [ "${K8S_SKIP_UNION}" == "null" ] && return diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index b9ab94098479..d215ea47fb7e 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -252,6 +252,7 @@ function main() { deploy-kata-snp) deploy_kata "snp" ;; deploy-kata-tdx) deploy_kata "tdx" ;; deploy-kata-garm) deploy_kata "garm" ;; + deploy-kata-zvsi) deploy_kata "zvsi" ;; run-tests) run_tests ;; run-tests-kcli) run_tests "kcli" ;; cleanup-kcli) cleanup "kcli" ;; @@ -259,6 +260,7 @@ function main() { cleanup-snp) cleanup "snp" ;; cleanup-tdx) cleanup "tdx" ;; cleanup-garm) cleanup "garm" ;; + cleanup-zvsi) cleanup "zvsi" ;; delete-cluster) cleanup "aks" ;; delete-cluster-kcli) delete_cluster_kcli ;; *) >&2 echo "Invalid argument"; exit 2 ;; diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-stratovirt-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-stratovirt-kata-metric8.toml new file mode 100644 index 000000000000..b5ad92ca1935 --- /dev/null +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-stratovirt-kata-metric8.toml @@ -0,0 +1,162 @@ +# Copyright (c) 2023 Huawei Technologies Co.,Ltd. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This file contains baseline expectations +# for checked results by checkmetrics tool. + +[[metric]] +name = "boot-times" +type = "json" +description = "measure container lifecycle timings" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"boot-times\".Results | .[] | .\"to-workload\".Result" +checktype = "mean" +midval = 0.62 +minpercent = 40.0 +maxpercent = 40.0 + +[[metric]] +name = "memory-footprint" +type = "json" +description = "measure memory usage" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"memory-footprint\".Results | .[] | .average.Result" +checktype = "mean" +midval = 129842.10 +minpercent = 30.0 +maxpercent = 30.0 + +[[metric]] +name = "memory-footprint-inside-container" +type = "json" +description = "measure memory inside the container" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"memory-footprint-inside-container\".Results | .[] | .memtotal.Result" +checktype = "mean" +midval = 2040568.0 +minpercent = 30.0 +maxpercent = 30.0 + +[[metric]] +name = "blogbench" +type = "json" +description = "measure container average of blogbench write" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"blogbench\".Results | .[] | .write.Result" +checktype = "mean" +midval = 603.0 +minpercent = 30.0 +maxpercent = 30.0 + +[[metric]] +name = "blogbench" +type = "json" +description = "measure container average of blogbench read" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"blogbench\".Results | .[] | .read.Result" +checktype = "mean" +midval = 37669.0 +minpercent = 30.0 +maxpercent = 30.0 + +[[metric]] +name = "tensorflow_nhwc" +type = "json" +description = "tensorflow resnet model" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"tensorflow_nhwc\".Results | .[] | .resnet.Result" +checktype = "mean" +midval = 2025.0 +minpercent = 30.0 +maxpercent = 30.0 + +[[metric]] +name = "tensorflow_nhwc" +type = "json" +description = "tensorflow alexnet model" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"tensorflow_nhwc\".Results | .[] | .alexnet.Result" +checktype = "mean" +midval = 75.0 +minpercent = 30.0 +maxpercent = 30.0 + +[[metric]] +name = "latency" +type = "json" +description = "measure container latency" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"latency\".Results | .[] | .latency.Result" +checktype = "mean" +midval = 0.78 +minpercent = 30.0 +maxpercent = 30.0 + +[[metric]] +name = "network-iperf3" +type = "json" +description = "measure container cpu utilization using iperf3" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .cpu.Result" +checktype = "mean" +midval = 60.10 +minpercent = 30.0 +maxpercent = 30.0 + +[[metric]] +name = "network-iperf3" +type = "json" +description = "measure container bandwidth using iperf3" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .bandwidth.Result" +checktype = "mean" +midval = 19959440840.94 +minpercent = 30.0 +maxpercent = 30.0 + +[[metric]] +name = "network-iperf3" +type = "json" +description = "measure container parallel bandwidth using iperf3" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .parallel.Result" +checktype = "mean" +midval = 25487333685.04 +minpercent = 30.0 +maxpercent = 30.0 + +[[metric]] +name = "network-iperf3" +type = "json" +description = "iperf" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .jitter.Result" +checktype = "mean" +midval = 0.038 +minpercent = 40.0 +maxpercent = 40.0 diff --git a/tests/metrics/machine_learning/tensorflow_resnet50_fp32.sh b/tests/metrics/machine_learning/tensorflow_resnet50_fp32.sh index 10abbd38138c..73ab0a2b226c 100755 --- a/tests/metrics/machine_learning/tensorflow_resnet50_fp32.sh +++ b/tests/metrics/machine_learning/tensorflow_resnet50_fp32.sh @@ -28,8 +28,8 @@ CMD="$dst_dir/$start_script" guest_trigger_file="$dst_dir/$trigger_file" host_trigger_file="$src_dir/$trigger_file" INITIAL_NUM_PIDS=1 -CMD_FILE="cat results | grep 'Average Throughput' | wc -l" -CMD_RESULTS="cat results | grep 'Average Throughput' | cut -d':' -f2 | cut -d' ' -f2 | tr '\n' ','" +CMD_FILE="cat results | grep 'Throughput' | wc -l" +CMD_RESULTS="cat results | grep 'Throughput' | cut -d':' -f2 | cut -d' ' -f2 | tr '\n' ','" function remove_tmp_file() { rm -rf "${tensorflow_file}" diff --git a/tests/metrics/network/README.md b/tests/metrics/network/README.md index 971aff6ae350..1d64f4ff59f6 100644 --- a/tests/metrics/network/README.md +++ b/tests/metrics/network/README.md @@ -12,6 +12,7 @@ bandwidth, jitter, latency and parallel bandwidth. - `k8s-network-metrics-iperf3.sh` measures bandwidth which is the speed of the data transfer. - `latency-network.sh` measures network latency. - `nginx-network.sh` is a benchmark of the lightweight Nginx HTTPS web-server and measures the HTTP requests over a fixed period of time with a configurable number of concurrent clients/connections. +- `k8s-network-metrics-iperf3-udp.sh` measures `UDP` bandwidth and parallel bandwidth which is the speed of the data transfer. ## Running the tests diff --git a/tests/stability/gha-run.sh b/tests/stability/gha-run.sh index 50561de33e87..23e495e4449c 100755 --- a/tests/stability/gha-run.sh +++ b/tests/stability/gha-run.sh @@ -18,20 +18,15 @@ function install_dependencies() { declare -a system_deps=( jq + curl + gnupg ) sudo apt-get update sudo apt-get -y install "${system_deps[@]}" ensure_yq - - declare -a github_deps - github_deps[0]="cri_containerd:$(get_from_kata_deps "externals.containerd.${CONTAINERD_VERSION}")" - - for github_dep in "${github_deps[@]}"; do - IFS=":" read -r -a dep <<< "${github_dep}" - install_${dep[0]} "${dep[1]}" - done + install_docker } function run() { @@ -40,6 +35,9 @@ function run() { export ITERATIONS=2 MAX_CONTAINERS=20 bash "${stability_dir}/soak_parallel_rm.sh" + info "Running stressng scability test using ${KATA_HYPERVISOR} hypervisor" + bash "${stability_dir}/stressng.sh" + info "Running scability test using ${KATA_HYPERVISOR} hypervisor" bash "${stability_dir}/scability_test.sh" 15 60 diff --git a/tests/stability/stressng.sh b/tests/stability/stressng.sh index 9f95b8c68fa3..cfd7ac71fc1d 100755 --- a/tests/stability/stressng.sh +++ b/tests/stability/stressng.sh @@ -33,16 +33,6 @@ function main() { MEMORY_CMD="stress-ng --cpu 2 --vm 4 -t 5m" sudo -E ctr t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${MEMORY_CMD}" - # Run shared memory stressors - info "Running 8 shared memory stressors" - SHARED_CMD="stress-ng --shm 0" - sudo -E ctr t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${SHARED_CMD}" - - # Run all stressors one by one on all CPUs - info "Running all stressors one by one" - STRESSORS_CMD="stress-ng --seq 0 -t 10 --tz -v" - sudo -E ctr t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${STRESSORS_CMD}" - # Test floating point on CPU for 60 seconds info "Running floating tests on CPU" FLOAT_CMD="stress-ng --matrix 1 -t 1m" @@ -50,7 +40,7 @@ function main() { # Runs two instances of the CPU stressors, one instance of the matrix info "Running instances of the CPU stressors" - INSTANCE_CMD='stress-ng --cpu 2 --matrix 1 --mq 3 -t 5m' + INSTANCE_CMD='stress-ng --cpu 2 --matrix 1 --mq 3 -t 3m' sudo -E ctr t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${INSTANCE_CMD}" clean_env_ctr diff --git a/tools/packaging/kata-deploy/examples/nginx-deployment-stratovirt.yaml b/tools/packaging/kata-deploy/examples/nginx-deployment-stratovirt.yaml new file mode 100644 index 000000000000..41f9a8a403ae --- /dev/null +++ b/tools/packaging/kata-deploy/examples/nginx-deployment-stratovirt.yaml @@ -0,0 +1,20 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-deployment-stratovirt +spec: + selector: + matchLabels: + app: nginx + replicas: 2 + template: + metadata: + labels: + app: nginx + spec: + runtimeClassName: kata-stratovirt + containers: + - name: nginx + image: nginx:1.14 + ports: + - containerPort: 80 diff --git a/tools/packaging/kata-deploy/examples/test-deploy-kata-stratovirt.yaml b/tools/packaging/kata-deploy/examples/test-deploy-kata-stratovirt.yaml new file mode 100644 index 000000000000..dcb3517ce171 --- /dev/null +++ b/tools/packaging/kata-deploy/examples/test-deploy-kata-stratovirt.yaml @@ -0,0 +1,42 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + run: php-apache-kata-stratovirt + name: php-apache-kata-stratovirt +spec: + replicas: 1 + selector: + matchLabels: + run: php-apache-kata-stratovirt + template: + metadata: + labels: + run: php-apache-kata-stratovirt + spec: + runtimeClassName: kata-stratovirt + containers: + - image: k8s.gcr.io/hpa-example + imagePullPolicy: Always + name: php-apache + ports: + - containerPort: 80 + protocol: TCP + resources: + requests: + cpu: 200m + restartPolicy: Always +--- +apiVersion: v1 +kind: Service +metadata: + name: php-apache-kata-stratovirt +spec: + ports: + - port: 80 + protocol: TCP + targetPort: 80 + selector: + run: php-apache-kata-stratovirt + sessionAffinity: None + type: ClusterIP diff --git a/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml b/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml index 7cb8756891e3..df7b715a9e5e 100644 --- a/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml +++ b/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml @@ -30,7 +30,7 @@ spec: - name: DEBUG value: "false" - name: SHIMS - value: "clh dragonball fc qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx qemu" + value: "clh dragonball fc qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx qemu remote stratovirt" - name: DEFAULT_SHIM value: "qemu" - name: CREATE_RUNTIMECLASSES diff --git a/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml b/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml index 8dde8962e7ec..03cbe4a53e51 100644 --- a/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml +++ b/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml @@ -32,7 +32,7 @@ spec: - name: DEBUG value: "false" - name: SHIMS - value: "clh dragonball fc qemu qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx" + value: "clh dragonball fc qemu qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx remote stratovirt" - name: DEFAULT_SHIM value: "qemu" - name: CREATE_RUNTIMECLASSES diff --git a/tools/packaging/kata-deploy/local-build/Makefile b/tools/packaging/kata-deploy/local-build/Makefile index db9218ac22b0..0d64cd4cb772 100644 --- a/tools/packaging/kata-deploy/local-build/Makefile +++ b/tools/packaging/kata-deploy/local-build/Makefile @@ -35,6 +35,7 @@ all: serial-targets \ qemu-snp-experimental-tarball \ qemu-tarball \ qemu-tdx-experimental-tarball \ + stratovirt-tarball \ shim-v2-tarball \ tdvf-tarball \ virtiofsd-tarball @@ -115,6 +116,9 @@ qemu-tarball: qemu-tdx-experimental-tarball: ${MAKE} $@-build +stratovirt-tarball: + ${MAKE} $@-build + rootfs-image-tarball: ${MAKE} $@-build diff --git a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile index a29514968b2e..6b99f6cf107b 100644 --- a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile +++ b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile @@ -11,6 +11,19 @@ COPY install_oras.sh /usr/bin/install_oras.sh SHELL ["/bin/bash", "-o", "pipefail", "-c"] +#FIXME: gcc is required as agent is build out of a container build. +RUN apt-get update && \ + apt-get install --no-install-recommends -y \ + build-essential \ + cpio \ + gcc \ + git \ + make \ + unzip \ + wget \ + xz-utils && \ + apt-get clean && rm -rf /var/lib/apt/lists + # Install yq, oras, and docker RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -37,18 +50,6 @@ RUN if [ ${IMG_USER} != "root" ] && [ ! -z ${HOST_DOCKER_GID} ]; then groupadd - RUN if [ ${IMG_USER} != "root" ] && [ ! -z ${HOST_DOCKER_GID} ]; then usermod -a -G docker_on_host ${IMG_USER};fi RUN sh -c "echo '${IMG_USER} ALL=NOPASSWD: ALL' >> /etc/sudoers" -#FIXME: gcc is required as agent is build out of a container build. -RUN apt-get update && \ - apt-get install --no-install-recommends -y \ - build-essential \ - cpio \ - gcc \ - git \ - make \ - unzip \ - wget \ - xz-utils && \ - apt-get clean && rm -rf /var/lib/apt/lists ENV USER ${IMG_USER} USER ${IMG_USER} diff --git a/tools/packaging/kata-deploy/local-build/dockerbuild/install_oras.sh b/tools/packaging/kata-deploy/local-build/dockerbuild/install_oras.sh index 973a10205075..cf7eb4c45205 100755 --- a/tools/packaging/kata-deploy/local-build/dockerbuild/install_oras.sh +++ b/tools/packaging/kata-deploy/local-build/dockerbuild/install_oras.sh @@ -29,7 +29,17 @@ fi arch=$(uname -m) if [ "${arch}" = "ppc64le" ]; then - echo "An ORAS release for ppc64le is not available yet." + echo "Using oras from native builds" + #install go first + wget https://go.dev/dl/go1.21.1.linux-ppc64le.tar.gz + rm -rf /usr/local/go && tar -C /usr/local -xzf go1.21.1.linux-ppc64le.tar.gz + export PATH=$PATH:/usr/local/go/bin + go version + + git clone https://github.com/oras-project/oras.git + cd oras && make build-linux-ppc64le + cp bin/linux/ppc64le/oras ${install_dest} + make exit 0 fi if [ "${arch}" = "x86_64" ]; then diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index fcbade01135b..cb8bb449fb4e 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -30,6 +30,7 @@ readonly kernel_builder="${static_build_dir}/kernel/build.sh" readonly ovmf_builder="${static_build_dir}/ovmf/build.sh" readonly qemu_builder="${static_build_dir}/qemu/build-static-qemu.sh" readonly qemu_experimental_builder="${static_build_dir}/qemu/build-static-qemu-experimental.sh" +readonly stratovirt_builder="${static_build_dir}/stratovirt/build-static-stratovirt.sh" readonly shimv2_builder="${static_build_dir}/shim-v2/build.sh" readonly virtiofsd_builder="${static_build_dir}/virtiofsd/build.sh" readonly nydus_builder="${static_build_dir}/nydus/build.sh" @@ -104,6 +105,7 @@ options: qemu qemu-snp-experimental qemu-tdx-experimental + stratovirt rootfs-image rootfs-image-tdx rootfs-initrd @@ -515,6 +517,28 @@ install_clh_glibc() { install_clh_helper "gnu" "${features}" "-glibc" } +# Install static stratovirt asset +install_stratovirt() { + local stratovirt_version=$(get_from_kata_deps "assets.hypervisor.stratovirt.version") + + latest_artefact="${stratovirt_version}" + latest_builder_image="" + + install_cached_tarball_component \ + "stratovirt" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ + "${final_tarball_name}" \ + "${final_tarball_path}" \ + && return 0 + + info "build static stratovirt" + "${stratovirt_builder}" + info "Install static stratovirt" + mkdir -p "${destdir}/opt/kata/bin/" + sudo install -D --owner root --group root --mode 0744 static-stratovirt/stratovirt "${destdir}/opt/kata/bin/stratovirt" +} + # Install static virtiofsd asset install_virtiofsd() { latest_artefact="$(get_from_kata_deps "externals.virtiofsd.version")-$(get_from_kata_deps "externals.virtiofsd.toolchain")" @@ -742,6 +766,7 @@ handle_build() { install_qemu install_qemu_snp_experimental install_qemu_tdx_experimental + install_stratovirt install_runk install_shimv2 install_tdvf @@ -791,6 +816,8 @@ handle_build() { qemu-tdx-experimental) install_qemu_tdx_experimental ;; + stratovirt) install_stratovirt ;; + rootfs-image) install_image ;; rootfs-image-tdx) install_image_tdx ;; @@ -837,7 +864,7 @@ handle_build() { echo "${ARTEFACT_REGISTRY_PASSWORD}" | sudo oras login "${ARTEFACT_REGISTRY}" -u "${ARTEFACT_REGISTRY_USERNAME}" --password-stdin - sudo oras push ${ARTEFACT_REGISTRY}/kata-containers/cached-artefacts/${build_target}:latest-${TARGET_BRANCH}-$(uname -m) ${final_tarball_name} ${build_target}-version ${build_target}-builder-image-version ${build_target}-sha256sum + sudo oras push ${ARTEFACT_REGISTRY}/amulyam24/cached-artefacts/${build_target}:latest-main-$(uname -m) ${final_tarball_name} ${build_target}-version ${build_target}-builder-image-version ${build_target}-sha256sum sudo oras logout "${ARTEFACT_REGISTRY}" fi @@ -871,6 +898,7 @@ main() { log-parser-rs nydus qemu + stratovirt rootfs-image rootfs-initrd rootfs-initrd-mariner diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh index af746e79c5ba..3d091136cf01 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh @@ -13,7 +13,7 @@ set -o errtrace KATA_DEPLOY_DIR="`dirname ${0}`/../../kata-deploy" KATA_DEPLOY_ARTIFACT="${1:-"kata-static.tar.xz"}" -REGISTRY="${2:-"quay.io/kata-containers/kata-deploy"}" +REGISTRY="quay.io/ameka/kata-deploy" TAG="${3:-}" echo "Copying ${KATA_DEPLOY_ARTIFACT} to ${KATA_DEPLOY_DIR}" diff --git a/tools/packaging/kata-deploy/runtimeclasses/kata-remote.yaml b/tools/packaging/kata-deploy/runtimeclasses/kata-remote.yaml new file mode 100644 index 000000000000..242b2c509ea1 --- /dev/null +++ b/tools/packaging/kata-deploy/runtimeclasses/kata-remote.yaml @@ -0,0 +1,13 @@ +--- +kind: RuntimeClass +apiVersion: node.k8s.io/v1 +metadata: + name: kata-remote +handler: kata-remote +overhead: + podFixed: + memory: "120Mi" + cpu: "250m" +scheduling: + nodeSelector: + katacontainers.io/kata-runtime: "true" diff --git a/tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml b/tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml index e67cafe10614..8736ad6325dd 100644 --- a/tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml +++ b/tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml @@ -102,3 +102,29 @@ overhead: scheduling: nodeSelector: katacontainers.io/kata-runtime: "true" +--- +kind: RuntimeClass +apiVersion: node.k8s.io/v1 +metadata: + name: kata-remote +handler: kata-remote +overhead: + podFixed: + memory: "120Mi" + cpu: "250m" +scheduling: + nodeSelector: + katacontainers.io/kata-runtime: "true" +--- +kind: RuntimeClass +apiVersion: node.k8s.io/v1 +metadata: + name: kata-stratovirt +handler: kata-stratovirt +overhead: + podFixed: + memory: "130Mi" + cpu: "250m" +scheduling: + nodeSelector: + katacontainers.io/kata-runtime: "true" diff --git a/tools/packaging/kata-deploy/runtimeclasses/kata-stratovirt.yaml b/tools/packaging/kata-deploy/runtimeclasses/kata-stratovirt.yaml new file mode 100644 index 000000000000..ea27046d9c98 --- /dev/null +++ b/tools/packaging/kata-deploy/runtimeclasses/kata-stratovirt.yaml @@ -0,0 +1,13 @@ +--- +kind: RuntimeClass +apiVersion: node.k8s.io/v1 +metadata: + name: kata-stratovirt +handler: kata-stratovirt +overhead: + podFixed: + memory: "130Mi" + cpu: "250m" +scheduling: + nodeSelector: + katacontainers.io/kata-runtime: "true" diff --git a/tools/packaging/kata-deploy/scripts/kata-deploy.sh b/tools/packaging/kata-deploy/scripts/kata-deploy.sh index b0ce40e60752..178a33279c54 100755 --- a/tools/packaging/kata-deploy/scripts/kata-deploy.sh +++ b/tools/packaging/kata-deploy/scripts/kata-deploy.sh @@ -16,6 +16,7 @@ containerd_conf_file_backup="${containerd_conf_file}.bak" IFS=' ' read -a shims <<< "$SHIMS" default_shim="$DEFAULT_SHIM" +ALLOWED_HYPERVISOR_ANNOTATIONS="${ALLOWED_HYPERVISOR_ANNOTATIONS:-}" IFS=' ' read -a non_formatted_allowed_hypervisor_annotations <<< "$ALLOWED_HYPERVISOR_ANNOTATIONS" allowed_hypervisor_annotations="" diff --git a/tools/packaging/kernel/kata_config_version b/tools/packaging/kernel/kata_config_version index 5bc6609e3d85..415196e47f53 100644 --- a/tools/packaging/kernel/kata_config_version +++ b/tools/packaging/kernel/kata_config_version @@ -1 +1 @@ -117 +118 diff --git a/tools/packaging/kernel/patches/6.1.x/0005-erofs-support-flattened-block-device-for-multi-blob-.patch b/tools/packaging/kernel/patches/6.1.x/0005-erofs-support-flattened-block-device-for-multi-blob-.patch new file mode 100644 index 000000000000..6206d771083d --- /dev/null +++ b/tools/packaging/kernel/patches/6.1.x/0005-erofs-support-flattened-block-device-for-multi-blob-.patch @@ -0,0 +1,106 @@ +From 8b465fecc35a434e61728a6184d188c6daa37a5d Mon Sep 17 00:00:00 2001 +From: Jia Zhu +Date: Thu, 2 Mar 2023 15:17:51 +0800 +Subject: [PATCH] erofs: support flattened block device for multi-blob images + +In order to support mounting multi-blobs container image as a single +block device, add flattened block device feature for EROFS. + +In this mode, all meta/data contents will be mapped into one block +space. User could compose a block device(by nbd/ublk/virtio-blk/ +vhost-user-blk) from multiple sources and mount the block device by +EROFS directly. It can reduce the number of block devices used, and +it's also benefits in both VM file passthrough and distributed storage +scenarios. + +You can test this using the method mentioned by: +https://github.com/dragonflyoss/image-service/pull/1139 +1. Compose a (nbd)block device from multi-blobs. +2. Mount EROFS on mntdir/. +3. Compare the md5sum between source dir and mntdir/. + +Later, we could also use it to refer original tar blobs. + +Signed-off-by: Jia Zhu +Signed-off-by: Xin Yin +Reviewed-by: Jingbo Xu +Acked-by: Chao Yu +Tested-by: Jiang Liu +Link: https://lore.kernel.org/r/20230302071751.48425-1-zhujia.zj@bytedance.com +[ Gao Xiang: refine commit message and use erofs_pos(). ] +Signed-off-by: Gao Xiang +--- + fs/erofs/data.c | 8 ++++++-- + fs/erofs/internal.h | 1 + + fs/erofs/super.c | 5 ++++- + 3 files changed, 11 insertions(+), 3 deletions(-) + +diff --git a/fs/erofs/data.c b/fs/erofs/data.c +index 1c931e32d28e..03c6ffdfcbfb 100644 +--- a/fs/erofs/data.c ++++ b/fs/erofs/data.c +@@ -200,7 +200,6 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) + struct erofs_device_info *dif; + int id; + +- /* primary device by default */ + map->m_bdev = sb->s_bdev; + map->m_daxdev = EROFS_SB(sb)->dax_dev; + map->m_dax_part_off = EROFS_SB(sb)->dax_part_off; +@@ -213,12 +212,17 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) + up_read(&devs->rwsem); + return -ENODEV; + } ++ if (devs->flatdev) { ++ map->m_pa += erofs_pos(sb, dif->mapped_blkaddr); ++ up_read(&devs->rwsem); ++ return 0; ++ } + map->m_bdev = dif->bdev; + map->m_daxdev = dif->dax_dev; + map->m_dax_part_off = dif->dax_part_off; + map->m_fscache = dif->fscache; + up_read(&devs->rwsem); +- } else if (devs->extra_devices) { ++ } else if (devs->extra_devices && !devs->flatdev) { + down_read(&devs->rwsem); + idr_for_each_entry(&devs->tree, dif, id) { + erofs_off_t startoff, length; +diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h +index 9eff0c0ad2d7..e30a4fd43ccb 100644 +--- a/fs/erofs/internal.h ++++ b/fs/erofs/internal.h +@@ -81,6 +81,7 @@ struct erofs_dev_context { + struct rw_semaphore rwsem; + + unsigned int extra_devices; ++ bool flatdev; + }; + + struct erofs_fs_context { +diff --git a/fs/erofs/super.c b/fs/erofs/super.c +index dbffcdd696df..9e56a6fb2267 100644 +--- a/fs/erofs/super.c ++++ b/fs/erofs/super.c +@@ -252,7 +252,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, + if (IS_ERR(fscache)) + return PTR_ERR(fscache); + dif->fscache = fscache; +- } else { ++ } else if (!sbi->devs->flatdev) { + bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL, + sb->s_type); + if (IS_ERR(bdev)) +@@ -294,6 +294,9 @@ static int erofs_scan_devices(struct super_block *sb, + if (!ondisk_extradevs) + return 0; + ++ if (!sbi->devs->extra_devices && !erofs_is_fscache_mode(sb)) ++ sbi->devs->flatdev = true; ++ + sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1; + pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE; + down_read(&sbi->devs->rwsem); +-- +2.34.1 + diff --git a/tools/packaging/scripts/lib.sh b/tools/packaging/scripts/lib.sh index 7e8fe05275a8..8cd813cc2a4f 100644 --- a/tools/packaging/scripts/lib.sh +++ b/tools/packaging/scripts/lib.sh @@ -6,7 +6,7 @@ # export GOPATH=${GOPATH:-${HOME}/go} -export BUILDER_REGISTRY="${BUILDER_REGISTRY:-quay.io/kata-containers/builders}" +export BUILDER_REGISTRY="${BUILDER_REGISTRY:-quay.io/ameka/builders}" export PUSH_TO_REGISTRY="${PUSH_TO_REGISTRY:-"no"}" this_script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/tools/packaging/static-build/qemu.blacklist b/tools/packaging/static-build/qemu.blacklist index 31f6ab23ce52..c5362686e56c 100644 --- a/tools/packaging/static-build/qemu.blacklist +++ b/tools/packaging/static-build/qemu.blacklist @@ -29,7 +29,6 @@ qemu_black_list=( */share/*/qemu_vga.ndrv */share/*/sgabios.bin */share/*/skiboot.lid -*/share/*/slof.bin */share/*/trace-events-all */share/*/u-boot* */share/*/vgabios* diff --git a/tools/packaging/static-build/qemu/Dockerfile b/tools/packaging/static-build/qemu/Dockerfile index f5e812c915af..5ed2d1ad9bbe 100644 --- a/tools/packaging/static-build/qemu/Dockerfile +++ b/tools/packaging/static-build/qemu/Dockerfile @@ -63,6 +63,7 @@ RUN apt-get update && apt-get upgrade -y && \ rsync \ zlib1g-dev${DPKG_ARCH} && \ if [ "${ARCH}" != s390x ]; then apt-get install -y --no-install-recommends libpmem-dev${DPKG_ARCH}; fi && \ + if [ "${ARCH}" == ppc64le ]; then apt-get install -y --no-install-recommends librados-dev librbd-dev; fi && \ GCC_ARCH="${ARCH}" && if [ "${ARCH}" = "ppc64le" ]; then GCC_ARCH="powerpc64le"; fi && \ if [ "${ARCH}" != "$(uname -m)" ]; then apt-get install --no-install-recommends -y gcc-"${GCC_ARCH}"-linux-gnu; fi && \ apt-get clean && rm -rf /var/lib/apt/lists/ diff --git a/tools/packaging/static-build/qemu/build-qemu.sh b/tools/packaging/static-build/qemu/build-qemu.sh index cab5f251b1f7..231922b2b809 100755 --- a/tools/packaging/static-build/qemu/build-qemu.sh +++ b/tools/packaging/static-build/qemu/build-qemu.sh @@ -24,6 +24,8 @@ scripts/git-submodule.sh update meson capstone ${kata_packaging_scripts}/patch_qemu.sh "${QEMU_VERSION_NUM}" "${kata_packaging_dir}/qemu/patches" if [ "$(uname -m)" != "${ARCH}" ] && [ "${ARCH}" == "s390x" ]; then PREFIX="${PREFIX}" ${kata_packaging_scripts}/configure-hypervisor.sh -s "${HYPERVISOR_NAME}" "${ARCH}" | xargs ./configure --with-pkgversion="${PKGVERSION}" --cc=s390x-linux-gnu-gcc --cross-prefix=s390x-linux-gnu- --prefix="${PREFIX}" --target-list=s390x-softmmu +#elif [ "${ARCH}" == "ppc64le" ]; then +# PREFIX="${PREFIX}" ${kata_packaging_scripts}/configure-hypervisor.sh "${HYPERVISOR_NAME}" "${ARCH}" | xargs ./configure --with-pkgversion="${PKGVERSION}" else PREFIX="${PREFIX}" ${kata_packaging_scripts}/configure-hypervisor.sh -s "${HYPERVISOR_NAME}" "${ARCH}" | xargs ./configure --with-pkgversion="${PKGVERSION}" fi diff --git a/tools/packaging/static-build/shim-v2/build.sh b/tools/packaging/static-build/shim-v2/build.sh index aeb48f2da8c9..24695c4f3d89 100755 --- a/tools/packaging/static-build/shim-v2/build.sh +++ b/tools/packaging/static-build/shim-v2/build.sh @@ -74,7 +74,12 @@ sudo docker run --rm -i -v "${repo_root_dir}:${repo_root_dir}" \ for vmm in ${VMM_CONFIGS}; do config_file="${DESTDIR}/${PREFIX}/share/defaults/kata-containers/configuration-${vmm}.toml" if [ -f ${config_file} ]; then - sudo sed -i -e '/^initrd =/d' ${config_file} + if [ ${ARCH} == "ppc64le" ]; then + sudo sed -i -e '/^image =/d' ${config_file} + sudo sed -i 's/^# \(initrd =.*\)/\1/g' ${config_file} + else + sudo sed -i -e '/^initrd =/d' ${config_file} + fi fi done diff --git a/tools/packaging/static-build/stratovirt/build-static-stratovirt.sh b/tools/packaging/static-build/stratovirt/build-static-stratovirt.sh new file mode 100755 index 000000000000..be0236604634 --- /dev/null +++ b/tools/packaging/static-build/stratovirt/build-static-stratovirt.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2023 Huawei Technologies Co.,Ltd. +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o nounset +set -o pipefail + +ARCH=$(uname -m) + +# Currently, StratoVirt only support x86_64 and aarch64. +[ "${ARCH}" != "x86_64" ] && [ "${ARCH}" != "aarch64" ] && exit + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${script_dir}/../../scripts/lib.sh" + +info "Get stratovirt information from runtime versions.yaml" +stratovirt_url="${stratovirt_url:-}" +[ -n "$stratovirt_url" ] || stratovirt_url=$(get_from_kata_deps "assets.hypervisor.stratovirt.url") +[ -n "$stratovirt_url" ] || die "failed to get stratovirt url" + +stratovirt_version="${stratovirt_version:-}" +[ -n "$stratovirt_version" ] || stratovirt_version=$(get_from_kata_deps "assets.hypervisor.stratovirt.version") +[ -n "$stratovirt_version" ] || die "failed to get stratovirt version" + +pull_stratovirt_released_binary() { + file_name="stratovirt-static-${stratovirt_version##*v}-${ARCH}" + download_url="${stratovirt_url}/releases/download/${stratovirt_version}/${file_name}.tar.gz" + + curl -L ${download_url} -o ${file_name}.tar.gz + mkdir -p static-stratovirt + tar zxvf ${file_name}.tar.gz -C static-stratovirt +} + +pull_stratovirt_released_binary + diff --git a/utils/kata-manager.sh b/utils/kata-manager.sh index 01db532de105..bf211424a062 100755 --- a/utils/kata-manager.sh +++ b/utils/kata-manager.sh @@ -264,6 +264,7 @@ Options: -f : Force installation (use with care). -h : Show this help statement. -k : Specify Kata Containers version. + -K : Specify local Kata Containers tarball to install (takes priority over '-k'). -l : List installed and available versions only, then exit (uses network). -o : Only install Kata Containers. -r : Don't cleanup on failure (retain files). @@ -583,29 +584,36 @@ configure_containerd() install_kata() { local requested_version="${1:-}" + local kata_tarball="${2:-}" local project="$kata_project" - local version_desc="latest version" - [ -n "$requested_version" ] && version_desc="version $requested_version" - - info "Downloading $project release ($version_desc)" - - local results - results=$(github_download_package \ - "$kata_releases_url" \ - "$requested_version" \ - "$project") - - [ -z "$results" ] && die "Cannot download $project release file" - - local version - version=$(echo "$results"|cut -d: -f1) - - local file - file=$(echo "$results"|cut -d: -f2-) + local version="" + if [ -z "$kata_tarball" ] + then + local version_desc="latest version" + [ -n "$requested_version" ] && version_desc="version $requested_version" + + info "Downloading $project release ($version_desc)" + + local results + results=$(github_download_package \ + "$kata_releases_url" \ + "$requested_version" \ + "$project") + + [ -z "$results" ] && die "Cannot download $project release file" + + version=$(echo "$results"|cut -d: -f1) + + [ -z "$version" ] && die "Cannot determine $project resolved version" + + local file + file=$(echo "$results"|cut -d: -f2-) + else + file="$kata_tarball" + fi - [ -z "$version" ] && die "Cannot determine $project resolved version" [ -z "$file" ] && die "Cannot determine $project release file" # Allow the containerd service to find the Kata shim and users to find @@ -627,7 +635,12 @@ install_kata() [ -n "$unexpected" ] && die "File '$file' contains unexpected paths: '$unexpected'" - info "Installing $project release $version from $file" + if [ -n "$kata_tarball" ] + then + info "Installing $project release from $file" + else + info "Installing $project release $version from $file" + fi sudo tar -C / -xvf "${file}" @@ -680,11 +693,12 @@ configure_kata() handle_kata() { local version="${1:-}" + local tarball="${2:-}" - local enable_debug="${2:-}" + local enable_debug="${3:-}" [ -z "$enable_debug" ] && die "no enable debug value" - install_kata "$version" "$enable_debug" + install_kata "$version" "$tarball" configure_kata "$enable_debug" @@ -838,10 +852,10 @@ handle_installation() # These params can be blank local kata_version="${7:-}" local containerd_flavour="${8:-}" - local install_docker="${9:-}" [ -z "$install_docker" ] && die "no install docker value" + local kata_tarball="${10:-}" # The tool to be testing the installation with local tool="ctr" @@ -861,7 +875,7 @@ handle_installation() setup "$cleanup" "$force" "$skip_containerd" - handle_kata "$kata_version" "$enable_debug" + handle_kata "$kata_version" "$kata_tarball" "$enable_debug" [ "$skip_containerd" = "false" ] && \ handle_containerd \ @@ -957,8 +971,9 @@ handle_args() local kata_version="" local containerd_flavour="lts" + local kata_tarball="" - while getopts "c:dDfhk:lortT" opt "$@" + while getopts "c:dDfhk:K:lortT" opt "$@" do case "$opt" in c) containerd_flavour="$OPTARG" ;; @@ -967,6 +982,7 @@ handle_args() f) force="true" ;; h) usage; exit 0 ;; k) kata_version="$OPTARG" ;; + K) kata_tarball="$OPTARG" ;; l) list_versions='true' ;; o) skip_containerd="true" ;; r) cleanup="false" ;; @@ -995,7 +1011,8 @@ handle_args() "$only_run_test" \ "$kata_version" \ "$containerd_flavour" \ - "$install_docker" + "$install_docker" \ + "$kata_tarball" } main() diff --git a/versions.yaml b/versions.yaml index cf650b7adc6c..5dc0084c9a61 100644 --- a/versions.yaml +++ b/versions.yaml @@ -115,6 +115,11 @@ assets: url: "https://github.com/AMDESE/qemu" tag: "3b6a2b6b7466f6dea53243900b7516c3f29027b7" + stratovirt: + description: "StratoVirt is an lightweight opensource VMM" + url: "https://github.com/openeuler-mirror/stratovirt" + version: "v2.3.0" + image: description: | Root filesystem disk image used to boot the guest virtual